In [None]:
!git clone https://github.com/KhushJShah/RoboChef

Cloning into 'RoboChef'...
remote: Enumerating objects: 1424, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (30/30), done.[K
remote: Total 1424 (delta 6), reused 36 (delta 6), pack-reused 1388[K
Receiving objects: 100% (1424/1424), 225.64 MiB | 41.76 MiB/s, done.
Resolving deltas: 100% (11/11), done.
Updating files: 100% (1395/1395), done.


In [None]:
import pandas as pd
import os
import shutil
import json
from sklearn.model_selection import train_test_split

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import pandas as pd
import json
import shutil
from sklearn.model_selection import train_test_split

def convert_shape_to_bbox(shape):
    if shape['name'] == 'polygon' or shape['name'] == 'polyline':
        all_points_x = shape.get('all_points_x', [])
        all_points_y = shape.get('all_points_y', [])
        if not all_points_x or not all_points_y:
            return None
        x_min = min(all_points_x)
        x_max = max(all_points_x)
        y_min = min(all_points_y)
        y_max = max(all_points_y)
    elif shape['name'] == 'rect':
        x_min = shape['x']
        y_min = shape['y']
        x_max = x_min + shape['width']
        y_max = y_min + shape['height']
    elif shape['name'] == 'ellipse':
        cx = shape['cx']
        cy = shape['cy']
        rx = shape['rx']
        ry = shape['ry']
        x_min = cx - rx
        x_max = cx + rx
        y_min = cy - ry
        y_max = cy + ry
    elif shape['name'] == 'circle':
        cx = shape['cx']
        cy = shape['cy']
        r = shape['r']
        x_min = cx - r
        x_max = cx + r
        y_min = cy - r
        y_max = cy + r
    else:
        return None

    return x_min, y_min, x_max, y_max

def convert_csv_to_yolo(csv_path, output_dir, classes, image_dirs):
    df = pd.read_csv(csv_path)

    os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels'), exist_ok=True)

    for filename in df['filename'].unique():
        image_data = df[df['filename'] == filename]
        img_annotations = []

        for _, row in image_data.iterrows():
            try:
                region_shape = json.loads(row['region_shape_attributes'])
                region_attr = json.loads(row['region_attributes'])
                spice = region_attr.get('Spice', 'None')

                bbox = convert_shape_to_bbox(region_shape)
                if bbox is None:
                    print(f"Skipping annotation due to unhandled shape: {region_shape}")
                    continue

                x_min, y_min, x_max, y_max = bbox

                # Convert to YOLO format
                img_width, img_height = row['file_size'], row['file_size']  # Assuming square images
                x_center = (x_min + x_max) / 2 / img_width
                y_center = (y_min + y_max) / 2 / img_height
                bbox_width = (x_max - x_min) / img_width
                bbox_height = (y_max - y_min) / img_height

                class_id = classes.index(spice)
                yolo_ann = f"{class_id} {x_center} {y_center} {bbox_width} {bbox_height}\n"
                img_annotations.append(yolo_ann)

            except Exception as e:
                print(f"Error processing row: {e}")
                continue

        # Save annotations in YOLO format
        img_base_name = os.path.splitext(filename)[0]
        with open(os.path.join(output_dir, 'labels', f"{img_base_name}.txt"), 'w') as yolo_file:
            yolo_file.writelines(img_annotations)

        # Find and move the image to the images directory
        for image_dir in image_dirs:
            img_src_path = os.path.join(image_dir, filename)
            if os.path.exists(img_src_path):
                img_dest_path = os.path.join(output_dir, 'images', filename)
                shutil.copy(img_src_path, img_dest_path)
                break

def split_dataset(output_dir, train_size=0.8):
    images_dir = os.path.join(output_dir, 'images')
    labels_dir = os.path.join(output_dir, 'labels')

    train_images_dir = os.path.join(images_dir, 'train')
    val_images_dir = os.path.join(images_dir, 'val')
    train_labels_dir = os.path.join(labels_dir, 'train')
    val_labels_dir = os.path.join(labels_dir, 'val')

    os.makedirs(train_images_dir, exist_ok=True)
    os.makedirs(val_images_dir, exist_ok=True)
    os.makedirs(train_labels_dir, exist_ok=True)
    os.makedirs(val_labels_dir, exist_ok=True)

    image_files = [f for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f)) and not os.path.isdir(os.path.join(images_dir, f))]
    train_files, val_files = train_test_split(image_files, train_size=train_size, random_state=42)

    for file in train_files:
        src_image_path = os.path.join(images_dir, file)
        dest_image_path = os.path.join(train_images_dir, file)
        src_label_path = os.path.join(labels_dir, os.path.splitext(file)[0] + '.txt')
        dest_label_path = os.path.join(train_labels_dir, os.path.splitext(file)[0] + '.txt')

        if os.path.exists(dest_image_path):
            os.remove(dest_image_path)
        shutil.move(src_image_path, dest_image_path)

        if os.path.exists(src_label_path):
            if os.path.exists(dest_label_path):
                os.remove(dest_label_path)
            shutil.move(src_label_path, dest_label_path)

    for file in val_files:
        src_image_path = os.path.join(images_dir, file)
        dest_image_path = os.path.join(val_images_dir, file)
        src_label_path = os.path.join(labels_dir, os.path.splitext(file)[0] + '.txt')
        dest_label_path = os.path.join(val_labels_dir, os.path.splitext(file)[0] + '.txt')

        if os.path.exists(dest_image_path):
            os.remove(dest_image_path)
        shutil.move(src_image_path, dest_image_path)

        if os.path.exists(src_label_path):
            if os.path.exists(dest_label_path):
                os.remove(dest_label_path)
            shutil.move(src_label_path, dest_label_path)

# Example usage
csv_path = '/content/RoboChef/Annotated/Annotation_Spices_csv.csv'
output_dir = '/content/drive/MyDrive/Colab Notebooks/spice_detection_dataset'
classes = ['Cinnamon', 'Clove', 'None']
image_dirs = [
    '/content/RoboChef/dataset/Both',
    '/content/RoboChef/dataset/Cinnamom stick',
    '/content/RoboChef/dataset/Cloves'
]

convert_csv_to_yolo(csv_path, output_dir, classes, image_dirs)
split_dataset(output_dir)


In [None]:
import os
import cv2
import random
import numpy as np
from tqdm import tqdm

def augment_image(image):
    # Define augmentation methods
    methods = [
        lambda x: cv2.flip(x, 0),  # Vertical flip
        lambda x: cv2.flip(x, 1),  # Horizontal flip
        lambda x: cv2.rotate(x, cv2.ROTATE_90_CLOCKWISE),  # Rotate 90 degrees clockwise
        lambda x: cv2.rotate(x, cv2.ROTATE_90_COUNTERCLOCKWISE),  # Rotate 90 degrees counter-clockwise
        lambda x: cv2.rotate(x, cv2.ROTATE_180),  # Rotate 180 degrees
        lambda x: cv2.convertScaleAbs(x, alpha=random.uniform(0.8, 1.2), beta=random.uniform(-10, 10))  # Random brightness/contrast
    ]

    augmented_image = random.choice(methods)(image)
    return augmented_image

def augment_dataset(images_dir, labels_dir, output_images_dir, output_labels_dir, augment_count=1):
    os.makedirs(output_images_dir, exist_ok=True)
    os.makedirs(output_labels_dir, exist_ok=True)

    for image_name in tqdm(os.listdir(images_dir)):
        image_path = os.path.join(images_dir, image_name)
        label_path = os.path.join(labels_dir, os.path.splitext(image_name)[0] + '.txt')

        image = cv2.imread(image_path)

        for i in range(augment_count):
            augmented_image = augment_image(image)

            # Save augmented image
            aug_image_name = f"{os.path.splitext(image_name)[0]}_aug_{i}.jpg"
            output_image_path = os.path.join(output_images_dir, aug_image_name)
            cv2.imwrite(output_image_path, augmented_image)

            # Copy the label file
            output_label_path = os.path.join(output_labels_dir, f"{os.path.splitext(image_name)[0]}_aug_{i}.txt")
            shutil.copy(label_path, output_label_path)

# Paths
images_dir = '/content/drive/MyDrive/Colab Notebooks/spice_detection_dataset/images/train'
labels_dir = '/content/drive/MyDrive/Colab Notebooks/spice_detection_dataset/labels/train'
output_images_dir = '/content/drive/MyDrive/Colab Notebooks/spice_detection_dataset/augmented_images/train'
output_labels_dir = '/content/drive/MyDrive/Colab Notebooks/spice_detection_dataset/augmented_labels/train'

# Apply augmentations
augment_dataset(images_dir, labels_dir, output_images_dir, output_labels_dir, augment_count=2)


100%|██████████| 177/177 [00:16<00:00, 10.57it/s]


In [None]:
!git clone https://github.com/ultralytics/yolov5  # Clone the YOLOv5 repository
%cd yolov5
!pip install -r requirements.txt  # Install the required dependencies



Cloning into 'yolov5'...
remote: Enumerating objects: 16634, done.[K
remote: Counting objects: 100% (175/175), done.[K
remote: Compressing objects: 100% (128/128), done.[K
remote: Total 16634 (delta 70), reused 103 (delta 47), pack-reused 16459[K
Receiving objects: 100% (16634/16634), 15.35 MiB | 26.34 MiB/s, done.
Resolving deltas: 100% (11373/11373), done.
/content/yolov5
Collecting gitpython>=3.1.30 (from -r requirements.txt (line 5))
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting pillow>=10.3.0 (from -r requirements.txt (line 9))
  Downloading pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.32.0 (from -r requirements.txt (line 12))
  Downloading requests-2.32.3-py3-none-any.whl (64 kB)

In [None]:
# Define the content of the data.yaml file
data_yaml_content = """
train: /content/drive/MyDrive/Colab Notebooks/spice_detection_dataset/images/train
val: /content/drive/MyDrive/Colab Notebooks/spice_detection_dataset/images/val

nc: 3  # Number of classes
names: ['Cinnamon', 'Clove', 'None']  # Class names
"""

# Specify the path where the data.yaml file will be saved
yaml_file_path = '/content/drive/MyDrive/Colab Notebooks/spice_detection_dataset/data.yaml'

# Write the content to the data.yaml file
with open(yaml_file_path, 'w') as file:
    file.write(data_yaml_content)

print(f"data.yaml file created at {yaml_file_path}")


data.yaml file created at /content/drive/MyDrive/Colab Notebooks/spice_detection_dataset/data.yaml


In [None]:
!python train.py --img 640 --batch 16 --epochs 200 --data /content/drive/MyDrive/Colab\ Notebooks/spice_detection_dataset/data.yaml --cfg models/yolov5s.yaml --weights yolov5s.pt


2024-06-18 00:01:46.194378: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-18 00:01:46.194433: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-18 00:01:46.308205: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=models/yolov5s.yaml, data=/content/drive/MyDrive/Colab Notebooks/spice_detection_dataset/data.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=200, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=data/hyps, resume_evolve=Non