In [None]:
!pip install -q scipy==1.11

In [None]:
# Installing necessary dependencies
!pip install -q ultralytics pycocotools

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
from tqdm.notebook import tqdm

# ** Datasets **

In [None]:
coco = COCO("../input/dlsprint2/badlad/labels/coco_format/train/badlad-train-coco.json")

In [None]:
ann_ids = coco.getAnnIds()
anns = coco.loadAnns(ann_ids)

cat_ids = [ann["category_id"] for ann in anns]
img_ids = [ann["image_id"] for ann in anns]

ann_ids = pd.Series(ann_ids)
cat_ids = pd.Series(cat_ids)
img_ids = pd.Series(img_ids)
img_ids
cat_ids

In [None]:
def organize_coco_data(data_dict: dict) -> tuple[list[str], list[dict], list[dict]]:
    thing_classes: list[str] = []

    # Map Category Names to IDs
    for cat in data_dict['categories']:
        thing_classes.append(cat['name'])

    # Images
    images_metadata: list[dict] = data_dict['images']
    return thing_classes, images_metadata

In [None]:
### Data Load ###

import json
from pathlib import Path

TEST_METADATA_PATH = Path("/kaggle/input/dlsprint2/badlad/badlad-test-metadata.json")
with TEST_METADATA_PATH.open() as f:
    test_dict = json.load(f)
thing_classes_test, images_metadata_test = organize_coco_data(test_dict)
test_metadata = pd.DataFrame(images_metadata_test)
test_metadata = test_metadata[['id', 'file_name', 'width', 'height']]
test_metadata = test_metadata.rename(columns={"id": "image_id"})
print("test_metadata size=", len(test_metadata))
test_metadata.head(5)

**Creating Validation Data**

In [None]:
import os
from sklearn.model_selection import StratifiedGroupKFold

FOLDS = 5
SEED = 3000

sgkf = StratifiedGroupKFold(n_splits=FOLDS, shuffle=True, random_state=3000)

counts = cat_ids.value_counts()

print(f"Number of images: {len(img_ids.unique())}")

for cls, count in zip(counts.index, counts):
    print(f"Number of instances of class {cls}: {count}")

In [None]:
folds = []
number_of_images = []
paragraph = []
text_box = []
image = []
table = []

for fold, (_, val_idx) in enumerate(sgkf.split(ann_ids, cat_ids, img_ids)):
    folds.append(fold)
    val_ann_ids = ann_ids[val_idx]
    val_cat_ids = cat_ids[val_idx]
    val_img_ids = set(img_ids[val_idx])
    
    os.makedirs(f"val_{fold}/images")
    os.makedirs(f"val_{fold}/labels")
    
    for img in tqdm(coco.loadImgs(val_img_ids)):
        img_src = "/kaggle/input/dlsprint2/badlad/images/train/" + img["file_name"]
        img_dst = f"val_{fold}/images/" + img["file_name"]
        os.symlink(img_src, img_dst)
        label_src = "/kaggle/input/dlsprint2/badlad/labels/yolov8_format/train/" + img["file_name"][:-4] + ".txt"
        label_dst = f"val_{fold}/labels/" + img["file_name"][:-4] + ".txt"
        os.symlink(label_src, label_dst)
    
    number_of_images.append(len(val_img_ids))
  
    paragraph.append(sum(val_cat_ids == 0))
    text_box.append(sum(val_cat_ids == 1))
    image.append(sum(val_cat_ids == 2))
    table.append(sum(val_cat_ids == 3))
    
df = pd.DataFrame({
    "Fold": folds,
    "Number of Images": number_of_images,
    "Paragraph": paragraph,
    "text_box": text_box,
    "image": image,
    "table": table,
})

df.set_index("Fold")

In [None]:
import wandb

wandb.init(mode="disabled")

In [None]:
%%writefile badlad.yaml
path: /kaggle/working/
train:
    - val_0/
    - val_1/
    - val_2/
    - val_3/
val: val_4/

names:
    0: paragraph
    1: text_box
    2: image
    3: table

In [None]:
from ultralytics import YOLO

model = YOLO("yolov8m.yaml")

In [None]:
result= model.train(data="/kaggle/working/badlad.yaml", 
    epochs=7,
    pretrained=False,
    imgsz=512,device=[0, 1])

# **Training Result**

In [None]:
import pandas as pd
df=pd.read_csv('/kaggle/working/runs/detect/train/results.csv')
df.head()

In [None]:
df.head(5)

In [None]:
# code for displaying multiple images in one figure

#import libraries
import cv2
from matplotlib import pyplot as plt

# create figure
fig = plt.figure(figsize=(10, 7))

# setting values to rows and column variables
rows = 2
columns = 2

# reading images
Image1 = cv2.imread('/kaggle/working/runs/detect/train/P_curve.png')
Image2 = cv2.imread('/kaggle/working/runs/detect/train/confusion_matrix_normalized.png')
Image3 = cv2.imread('/kaggle/working/runs/detect/train/F1_curve.png')
Image4 = cv2.imread('/kaggle/working/runs/detect/train/PR_curve.png')

# Adds a subplot at the 1st position
fig.add_subplot(rows, columns, 1)

# showing image
plt.imshow(Image1)
plt.axis('off')
plt.title("First")

# Adds a subplot at the 2nd position
fig.add_subplot(rows, columns, 2)

# showing image
plt.imshow(Image2)
plt.axis('off')
plt.title("Second")

# Adds a subplot at the 3rd position
fig.add_subplot(rows, columns, 3)

# showing image
plt.imshow(Image3)
plt.axis('off')
plt.title("Third")

# Adds a subplot at the 4th position
fig.add_subplot(rows, columns, 4)

# showing image
plt.imshow(Image4)
plt.axis('off')
plt.title("Fourth")


# **Inference**

In [None]:
results=model.predict(model='/kaggle/working/runs/detect/train/weights/best.pt' ,
             source='/kaggle/input/dlsprint2/badlad/images/test/0004ffad-d055-4b02-a9a3-4b2aef301594.png')

In [None]:
results

In [None]:
import torch
import cv2 
import numpy as np
import pathlib
import matplotlib.pyplot as plt

img = cv2.imread("/kaggle/input/dlsprint2/badlad/images/test/0004ffad-d055-4b02-a9a3-4b2aef301594.png")
model = YOLO("/kaggle/working/runs/detect/train/weights/best.pt")
results = model(img)
res_plotted = results[0].plot()

In [None]:
plt.imshow(res_plotted)

In [None]:
for result in results:
    boxes = result.boxes  # Boxes object for bbox outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Class probabilities for classification outputs
    key= result.keys
results

In [None]:
print(boxes)
print(masks)
print(key)
print(probs)

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
res_plotted = results[0].plot()
plt.imshow(res_plotted)

In [None]:
boxes = results[0].boxes
box1 = boxes[2]  # returns one box
box1.xyxy
cats=box1.cls.short().to("cpu").numpy()
#cats=np.sort(cats)
z1=torch.squeeze(box1.xyxy)
z1
c1=z.short().to("cpu").numpy()
box1.xyxy
c
box2 = boxes[1]  # returns one box
box2.xyxy
z2=torch.squeeze(box2.xyxy)
z2
c2=z2.short().to("cpu").numpy()
z1

In [None]:
result.orig_shape

In [None]:
import numpy as np

def bbox_to_mask(image_shape, bounding_boxs):
    
    binary_mask = np.zeros(image_shape, dtype=np.uint8)

    x_min, y_min, x_max, y_max = bounding_boxs

    # Update the corresponding region in the binary mask
    binary_mask[y_min:y_max, x_min:x_max] = 1

    return binary_mask

In [None]:
mask1=bbox_to_mask(result.orig_shape,c1)
mask2=bbox_to_mask(result.orig_shape,c2)
mask=np.logical_or(mask1,mask2)

In [None]:
plt.imshow(mask)