oidv6-train-annotations-bbox.csv file contains the ground truth bounding box annotations for the images in the Open Images training set, including classes "Coffee" and "French fries". This file includes all the necessary information about bounding boxes for objects in the images, which is essential for training an object detection model like YOLO.

However, to use this data effectively, you'll need to:

1. Extract the relevant annotations for your specific images and classes.
2. Convert the annotations into the format required by YOLO.
3. Organize your dataset properly for training.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls /content/drive/MyDrive/DL_project/ # list dir

class-descriptions-boxable.csv	data_preparation.ipynb	 oidv6-train-annotations-bbox.csv
Cofee_class_data		French_fries_class_data  yolo_traininng.ipynb


In [None]:
!ls /content/drive/MyDrive/DL_project/French_fries_class_data

french_fries_image_ids.txt	   french_fries_image_list.txt
french_fries_image_ids_unique.txt  french_fries_images


In [None]:
# Base path for project
base_path = "/content/drive/MyDrive/DL_project/"

In [None]:
base_path

'/content/drive/MyDrive/DL_project/'

## Extract Annotations for Your Classes - Create a new CSV file with annotations only for chosen classes.

In [None]:
import csv
import os

classes_of_interest = ['/m/02vqfm', '/m/02y6n']  # Coffee and French fries
# Paths to the input and output files
input_file = os.path.join(base_path, 'oidv6-train-annotations-bbox.csv')
output_file = os.path.join(base_path, 'selected_annotations.csv')

# Open the original annotations file
with open(input_file, 'r') as infile, open(output_file, 'w', newline='') as outfile:
    reader = csv.DictReader(infile)
    fieldnames = reader.fieldnames
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()
    for row in reader:
        if row['LabelName'] in classes_of_interest:
            writer.writerow(row)

## Filter Annotations for Your Downloaded Images - Since downloaded only certain images, need to ensure that annotations correspond to those images.

##Extract the image IDs:

In [None]:
# Paths to image directories
coffee_images_path = os.path.join(base_path, 'Coffee_class_data', 'coffee_images')
french_fries_images_path = os.path.join(base_path, 'French_fries_class_data', 'french_fries_images')

# Get image IDs from your image folders
coffee_images = [os.path.splitext(f)[0] for f in os.listdir(coffee_images_path) if f.endswith('.jpg')]
french_fries_images = [os.path.splitext(f)[0] for f in os.listdir(french_fries_images_path) if f.endswith('.jpg')]

# Combine all image IDs
all_image_ids = set(coffee_images + french_fries_images)
len(all_image_ids)

2975

## Now, filter the annotations to include only these images - final_annotations.csv contains annotations for your images and classes.

In [None]:
# Read the selected annotations and filter for your images
with open(output_file, 'r') as infile, open(os.path.join(base_path, 'final_annotations.csv'), 'w', newline='') as outfile:
    reader = csv.DictReader(infile)
    fieldnames = reader.fieldnames
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()
    for row in reader:
        if row['ImageID'] in all_image_ids:
            writer.writerow(row)

## Convert Annotations to YOLO Format - Understand YOLO Annotation Format: For each image image_id.jpg, you need a text file image_id.txt containing lines with the format:
class_id x_center y_center width height

All coordinates are normalized between 0 and 1.

class_id is an integer starting from 0.


## Create a Mapping from Class IDs to YOLO Class Indices - Assign integer labels to your classes:

In [None]:
class_mapping = {
    '/m/02vqfm': 0,  # Coffee
    '/m/02y6n': 1   # French fries
}

{'/m/02vqfm': 0, '/m/02y6n': 1}

## Generate YOLO Annotation Files - Process the final_annotations.csv to create YOLO annotation files.

In [None]:
# Create a directory to store annotation files
annotations_dir = os.path.join(base_path, 'annotations')
os.makedirs(annotations_dir, exist_ok=True)

with open(os.path.join(base_path, 'final_annotations.csv'), 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        image_id = row['ImageID']
        class_id = row['LabelName']
        # Convert class ID to YOLO class index
        if class_id in class_mapping:
            yolo_class_id = class_mapping[class_id]
        else:
            continue

        # Get bounding box coordinates
        try:
            xmin = float(row['XMin'])
            xmax = float(row['XMax'])
            ymin = float(row['YMin'])
            ymax = float(row['YMax'])
        except ValueError:
            print(f"Invalid bounding box values for image {image_id}")
            continue

        # Validate coordinates
        if xmin >= xmax or ymin >= ymax:
            print(f"Invalid bounding box for image {image_id}")
            continue

        # Calculate YOLO format coordinates
        x_center = (xmin + xmax) / 2.0
        y_center = (ymin + ymax) / 2.0
        width = xmax - xmin
        height = ymax - ymin

        # Path to the annotation file
        annotation_file = os.path.join(annotations_dir, f'{image_id}.txt')
        with open(annotation_file, 'a') as f:
            f.write(f"{yolo_class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

## Organize Your Dataset for YOLO Training - YOLO models typically expect the dataset to be organized in a specific structure.

Prepare Directories - Organize data into training and validation sets.

In [None]:
import shutil
import random

# Create directories
os.makedirs('data/images/train', exist_ok=True)
os.makedirs('data/images/valid', exist_ok=True)
os.makedirs('data/labels/train', exist_ok=True)
os.makedirs('data/labels/valid', exist_ok=True)

# Get all image IDs
all_image_ids = list(all_image_ids)
random.shuffle(all_image_ids)

# Split into train and validation sets (e.g., 80% train, 20% valid)
split_index = int(len(all_image_ids) * 0.8)
train_image_ids = all_image_ids[:split_index]
valid_image_ids = all_image_ids[split_index:]

# Define source folders
source_folders = [coffee_images_path, french_fries_images_path]

# Define destination folders
train_image_dest = os.path.join(base_path, 'data/images/train')
train_label_dest = os.path.join(base_path, 'data/labels/train')
valid_image_dest = os.path.join(base_path, 'data/images/valid')
valid_label_dest = os.path.join(base_path, 'data/labels/valid')

# Ensure destination directories exist
os.makedirs(train_image_dest, exist_ok=True)
os.makedirs(train_label_dest, exist_ok=True)
os.makedirs(valid_image_dest, exist_ok=True)
os.makedirs(valid_label_dest, exist_ok=True)

# Function to copy images and annotations
def copy_files(image_ids, image_src_folders, dest_image_folder, dest_label_folder):
    for image_id in image_ids:
        # Copy image
        found = False
        for folder in image_src_folders:
            image_path = os.path.join(folder, f'{image_id}.jpg')
            if os.path.exists(image_path):
                shutil.copy(image_path, dest_image_folder)
                found = True
                break
        if not found:
            print(f"Image {image_id}.jpg not found in any source folder.")
            continue
        # Copy annotation
        annotation_src = os.path.join(annotations_dir, f'{image_id}.txt')
        annotation_dest = os.path.join(dest_label_folder, f'{image_id}.txt')
        if os.path.exists(annotation_src):
            shutil.copy(annotation_src, annotation_dest)
        else:
            # If no annotation exists, create an empty file (YOLO expects this)
            open(annotation_dest, 'a').close()

# Copy training files
copy_files(train_image_ids, source_folders, train_image_dest, train_label_dest)

# Copy validation files
copy_files(valid_image_ids, source_folders, valid_image_dest, valid_label_dest)

## Create a Data Configuration File - YOLO frameworks like YOLOv5 or YOLOv7 require a data configuration file specifying the dataset paths and classes.

Create a file named data.yaml:

In [None]:
# Define the content of the data.yaml file
data_yaml_content = """
train: /content/drive/MyDrive/DL_project/data/images/train
val: /content/drive/MyDrive/DL_project/data/images/valid

nc: 2  # number of classes
names: ['Coffee', 'French fries']  # class names
"""

# Path where you want to save the data.yaml file
yaml_file_path = '/content/drive/MyDrive/DL_project/data.yaml'

# Write the content to the data.yaml file
with open(yaml_file_path, 'w') as yaml_file:
    yaml_file.write(data_yaml_content)

In [None]:
!ls /content/drive/MyDrive/DL_project/

annotations			data_preparation.ipynb	 oidv6-train-annotations-bbox.csv
class-descriptions-boxable.csv	data.yaml		 selected_annotations.csv
Coffee_class_data		final_annotations.csv	 yolo_traininng.ipynb
data				French_fries_class_data
