In [1]:
!git clone https://github.com/KhushJShah/RoboChef.git

Cloning into 'RoboChef'...
remote: Enumerating objects: 1434, done.[K
remote: Counting objects: 100% (46/46), done.[K
remote: Compressing objects: 100% (38/38), done.[K
remote: Total 1434 (delta 11), reused 43 (delta 8), pack-reused 1388[K
Receiving objects: 100% (1434/1434), 225.80 MiB | 11.87 MiB/s, done.
Resolving deltas: 100% (16/16), done.
Updating files: 100% (1399/1399), done.


In [13]:
import os
import pandas as pd
import shutil
from sklearn.model_selection import train_test_split


In [14]:
csv_path = '/content/RoboChef/Annotated/Annotations_2_csv.csv'
annotations_df = pd.read_csv(csv_path)

# Define the paths to the images
image_dirs = {
    'Both': '/content/RoboChef/dataset/Both',
    'Cinnamon': '/content/RoboChef/dataset/Cinnamom stick',
    'Clove': '/content/RoboChef/dataset/Cloves'
}

# Create new directories
base_dir = '/content/drive/MyDrive/Colab Notebooks/spice_detection_dataset_yolo1'
img_dir = os.path.join(base_dir, 'images')
label_dir = os.path.join(base_dir, 'labels')
train_img_dir = os.path.join(img_dir, 'train')
val_img_dir = os.path.join(img_dir, 'val')
train_label_dir = os.path.join(label_dir, 'train')
val_label_dir = os.path.join(label_dir, 'val')

os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(val_img_dir, exist_ok=True)
os.makedirs(train_label_dir, exist_ok=True)
os.makedirs(val_label_dir, exist_ok=True)

In [15]:
def parse_region_attributes(region_shape_attributes, region_attributes):
    shape_attributes = json.loads(region_shape_attributes)
    attributes = json.loads(region_attributes)
    spice_type = attributes.get('Spice', 'None')

    # Assign class IDs
    class_id = {
        'Cinnamon': 0,
        'Clove': 1,
        'None': 2
    }.get(spice_type, 2)  # Default to 'None' if not found

    if shape_attributes['name'] == 'rect':
        x_min = shape_attributes['x']
        y_min = shape_attributes['y']
        width = shape_attributes['width']
        height = shape_attributes['height']
        x_max = x_min + width
        y_max = y_min + height
    elif shape_attributes['name'] == 'polygon':
        all_points_x = shape_attributes['all_points_x']
        all_points_y = shape_attributes['all_points_y']
        x_min = min(all_points_x)
        y_min = min(all_points_y)
        x_max = max(all_points_x)
        y_max = max(all_points_y)
    else:
        raise ValueError("Unsupported shape type: " + shape_attributes['name'])

    return class_id, x_min, y_min, x_max, y_max

# Create a list to store image paths and labels
data = []

# Load images and corresponding annotations
for _, row in annotations_df.iterrows():
    image_name = row['filename']
    for label, img_dir in image_dirs.items():
        img_path = os.path.join(img_dir, image_name)
        if os.path.exists(img_path):
            class_id, x_min, y_min, x_max, y_max = parse_region_attributes(row['region_shape_attributes'], row['region_attributes'])
            data.append((img_path, image_name, class_id, x_min, y_min, x_max, y_max))
            break

# Convert list to DataFrame for easier manipulation
data_df = pd.DataFrame(data, columns=['img_path', 'filename', 'class_id', 'x_min', 'y_min', 'x_max', 'y_max'])

# Split the data into training and validation sets
train_df, val_df = train_test_split(data_df, test_size=0.2, random_state=42)

# Define function to save images and labels in YOLO format
def save_yolo_format(df, img_dir, label_dir):
    for _, row in df.iterrows():
        img_path = row['img_path']
        image_name = row['filename']
        class_id = row['class_id']
        x_min = row['x_min']
        y_min = row['y_min']
        x_max = row['x_max']
        y_max = row['y_max']

        img = cv2.imread(img_path)
        if img is None:
            print(f"Error: Unable to load image {img_path}")
            continue

        height, width = img.shape[:2]

        # Normalize coordinates
        x_center = (x_min + x_max) / 2.0 / width
        y_center = (y_min + y_max) / 2.0 / height
        bbox_width = (x_max - x_min) / width
        bbox_height = (y_max - y_min) / height

        yolo_annotation = f"{class_id} {x_center} {y_center} {bbox_width} {bbox_height}"

        # Save the annotation to a file
        base_name = os.path.splitext(image_name)[0]
        label_path = os.path.join(label_dir, f"{base_name}.txt")
        img_save_path = os.path.join(img_dir, image_name)

        with open(label_path, 'w') as f:
            f.write(yolo_annotation)

        cv2.imwrite(img_save_path, img)

# Save training and validation datasets
save_yolo_format(train_df, train_img_dir, train_label_dir)
save_yolo_format(val_df, val_img_dir, val_label_dir)

In [16]:
# List the contents of the labels directory
print("Train labels:", os.listdir(train_label_dir))
print("Validation labels:", os.listdir(val_label_dir))

Train labels: ['Cloves77.txt', 'Cloves58.txt', 'CinnamomStick64.txt', 'Cloves22.txt', 'CinnamomStick52.txt', 'CinnamomStick25.txt', 'CinnamomStick54.txt', 'Cloves25.txt', 'Cloves356.txt', 'CinnamomStick120.txt', 'Cloves12.txt', 'CinnamomStick83.txt', 'Cloves34.txt', 'Cloves75.txt', 'CinnamomStick37.txt', 'CinnamomStick72.txt', 'CinnamomStick38.txt', 'CinnamomStick79.txt', 'Cloves56.txt', 'Cloves20.txt', 'Cloves66.txt', 'CinnamomStick47.txt', 'CC11.txt', 'Cloves3.txt', 'Cloves76.txt', 'Cloves33.txt', 'CC3.txt', 'CinnamomStick26.txt', 'CinnamomStick93.txt', 'CinnamomStick116.txt', 'CinnamomStick82.txt', 'CinnamomStick35.txt', 'CinnamomStick100.txt', 'Cloves57.txt', 'Cloves17.txt', 'CinnamomStick1.txt', 'CC6.txt', 'CinnamomStick5.txt', 'CC10.txt', 'CinnamomStick12.txt', 'Cloves353.txt', 'CinnamomStick94.txt', 'CinnamomStick61.txt', 'Cloves19.txt', 'CinnamomStick21.txt', 'CinnamomStick63.txt', 'CinnamomStick108.txt', 'Cloves45.txt', 'CinnamomStick44.txt', 'CinnamomStick11.txt', 'CinnamomSt

In [17]:
print(f"Total training images: {len(os.listdir(train_img_dir))}")
print(f"Total validation images: {len(os.listdir(val_img_dir))}")

Total training images: 172
Total validation images: 44


In [18]:
yaml_content = """
train: /content/drive/MyDrive/Colab Notebooks/spice_detection_dataset_yolo1/images/train
val: /content/drive/MyDrive/Colab Notebooks/spice_detection_dataset_yolo1/images/val

nc: 3
names: ['Cinnamon', 'Clove', 'None']
"""

yaml_path = os.path.join(base_dir, 'data.yaml')
with open(yaml_path, 'w') as f:
    f.write(yaml_content)

print(f"data.yaml created at: {yaml_path}")


data.yaml created at: /content/drive/MyDrive/Colab Notebooks/spice_detection_dataset_yolo1/data.yaml
