In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
import sys
import os
from os import listdir
from os.path import join
IN_COLAB = 'google.colab' in sys.modules

yolo_dataset_path = ''
if IN_COLAB:
  print('Using Google Colab')
  input_dataset_path = kagglehub.dataset_download('arct22/codebrim-balanced')
  yolo_dataset_path = '/content/'
  print(input_dataset_path)
  print('Data source import complete.')
  print(listdir(input_dataset_path))
elif os.environ.get('KAGGLE_KERNEL_RUN_TYPE') == 'interactive':
  print('Using Kaggle Kernel')
  input_dataset_path = '/kaggle/input/codebrim-balanced'
  print('Data source import complete.')
  print(os.listdir(input_dataset_path))
else:
  print(f'Using Local Machine. Operating System: {sys.platform}')
  input_dataset_path = join(os.getcwd(), r'kaggle\input\codebrim-balanced') if os.path.exists(join(os.getcwd(), r'kaggle\input\codebrim-balanced')) else ValueError
  yolo_dataset_path = join(os.getcwd(), r'datasets\codebrim-balanced')
  os.makedirs(yolo_dataset_path, exist_ok=True)
  print('Data source import complete.')
  print(f'input dataset path: {join(os.getcwd(),input_dataset_path)} \ncontents: ',listdir(input_dataset_path))
  print(f'yolo dataset path: {join(os.getcwd(),yolo_dataset_path)}')

input_train_images = join(input_dataset_path, 'train', 'defects')
input_test_images = join(input_dataset_path, 'test', 'defects')
input_val_images = join(input_dataset_path, 'val', 'defects')

yolo_train_images = join(yolo_dataset_path, 'images', 'train')
yolo_test_images = join(yolo_dataset_path, 'images', 'test')
yolo_val_images = join(yolo_dataset_path, 'images', 'val')

In [None]:
if sys.platform == 'linux' or sys.platform == 'linux2':
  !pip install -U albumentations
  !pip install xmltodict
  !pip install --extra-index-url https://pypi.nvidia.com --upgrade nvidia-dali-cuda120
  !pip install ultralytics
from ultralytics import settings
print(settings)
if IN_COLAB:

  settings.update({
      'runs_dir': '/content/runs',
      'weights_dir': '/content/weights',
      'datasets_dir': yolo_dataset_path
      })


import os
import shutil
from ultralytics import YOLO
cwd = os.getcwd()
print(cwd)

In [None]:
# @title
import cv2
import albumentations as A
from tqdm import tqdm
def pad_and_resize(target_width: int, target_height: int, input_path: str, output_path: str) -> None:
  imgs = os.listdir(input_path)

  os.makedirs(output_path, exist_ok=True)
  for img_name in tqdm(imgs):
      if not img_name.endswith(('.jpg', '.png')):
          continue

      img = cv2.imread(os.path.join(input_path, img_name))
      shape = img.shape
      height = shape[0]
      width = shape[1]

      pipeline = 0
      if height > width:
          pipeline = A.Compose([
              A.PadIfNeeded(width, width),
              A.Resize(640, 640)
      ])
      else:
          pipeline = A.Compose([
              A.PadIfNeeded(height, height),
              A.Resize(640, 640)
          ])

      transformed = pipeline(image=img)
      cv2.imwrite(os.path.join(output_path, img_name), transformed['image'])

  print(f'{len(os.listdir(output_path))} images padded, resized to w:{target_width}, h:{target_height} and saved in dir {output_path}')

In [None]:
for img_dir in ['train', 'test', 'val']:
    match img_dir:
        # case 'train':
        #     pad_and_resize(640, 640, input_train_images, yolo_train_images)
        #case 'test':
            #pad_and_resize(640, 640, input_test_images, yolo_test_images)
        case 'val':
            pad_and_resize(640, 640, input_val_images, yolo_val_images)

    # pad_and_resize(640, 640, join(input_dataset_path, img_dir, 'defects'), yolo_dataset_path)


In [None]:
import xmltodict

xml = ''
with open(os.path.join(input_dataset_path, 'metadata/defects.xml'), 'r') as file:
    xml = file.read()

defects_json = xmltodict.parse(xml)
defects_json = defects_json.get('Annotation').get('Defect')

import pandas as pd

defects_df = pd.DataFrame(defects_json)
defects_df

In [None]:
defect_labels_arr = pd.DataFrame(defects_json).to_numpy()
defect_imgs ={
    'train': os.listdir(yolo_train_images),
    'test': os.listdir(yolo_test_images),
    'val': os.listdir(yolo_val_images)
}

defect_labels_dict = {
    'train': [],
    'test': [],
    'val': []
    }

for defect in defect_labels_arr:
    if defect[0] in defect_imgs['train']:
        defect_labels_dict['train'].append(defect.tolist())
    elif defect[0] in defect_imgs['test']:
        defect_labels_dict['test'].append(defect.tolist())
    else:
        defect_labels_dict['val'].append(defect.tolist())

In [None]:
print(defect_imgs['val'])

In [None]:
pd.DataFrame(defect_labels_dict['train'])

In [None]:
def get_label_in_yolo_format(mhot_labels):
    x_center = 0.5
    y_center = 0.5

    present_labels = []
    i = 0
    for val in mhot_labels:
        if val.isdigit():
            if int(val) == 1:
                present_labels.append(i)
            i += 1

    out_list = []
    for label in present_labels:
        out_list.append(" ".join(map(str, [label, 0.5, 0.5, 1, 1])))

    return out_list

yolo_labels_dir = join(yolo_dataset_path, 'labels')
example_label_yolo = get_label_in_yolo_format(defect_labels_dict['train'][0])

In [None]:
d = []

for label_dir in defect_labels_dict.keys():
    defects = defect_labels_dict[label_dir]
    os.makedirs(os.path.join(yolo_labels_dir, label_dir), exist_ok=True)
    for row in defects:
        filepath = os.path.join(yolo_labels_dir, label_dir, row[0].split('.')[0] + '.txt')
        yolo_label = get_label_in_yolo_format(row)

        with open(filepath, 'w') as file:
            for label in yolo_label:
                file.write(label + '\n')
        d.append([filepath, yolo_label])
    #print(label_dir, defects)


In [None]:
pd.DataFrame(d)

In [None]:
from ultralytics import YOLO

model = YOLO('yolo11n.pt')

results = model.train(data=os.path.joinon(os.getcwd(), 'yolo_balanced.yaml'), epochs=1, imgsz=640)