<a href="https://colab.research.google.com/github/NishchayKarn/Academic_Mischief/blob/main/COCO_Dataset_Using_YOLO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'coco-2017-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F857191%2F1462296%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20241010%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20241010T191041Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D890888898be543c7645cb3856c7696f714f3b696150c49c896059a04c20a2180d694b93d73dd085d1c592ccbdace98c3c996c44adcc8354d5c19745ea4486726c19edc437c4a9408622d931207362f11c50a262c04b87c8783cf7b591287d9dc9a2c1832fa794a589cf9385f7ab285fd875741f92494497680ff6c47a6705291ad0a11edadad2b80aedf057dfd58593f4d85fe818bca05700c91df2238a419b1cced014f349ad5d8d42702b5322be6d55ba52a4ed668e7268ff40d1a3ef6b3f4c52d6b5571ca3fee5df35914b3caf24e256cbe83babd13c3ef645eb95fb1b0c42cef2bfd77b9a1ee2ab833538474f7b1c190b67419b1a3c1232c21419ff61eb2'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


# COCO Dataset to YOLO Format Conversion and Training with Ultralytics YOLOv8

This notebook converts the COCO dataset into YOLO format and trains a YOLOv8 model using Ultralytics. It supports different annotation types: bounding boxes, segmentation masks, and keypoints.

**Key Features:**

* **Annotation Type Selection:** Choose between 'bbox', 'segmentation', and 'keypoints'.
* **Data Reduction:** Optionally reduce the training and validation datasets for faster experimentation.
* **Parallel Processing:** Utilizes multiple CPU cores for efficient data conversion.
* **Automatic Configuration:** Generates a `config.yaml` file for YOLOv8 training.
* **Training Visualization:** Displays training results with confusion matrix and metrics plots.
* **Prediction Example:** Demonstrates prediction on sample images.

In [None]:
import os
import json
import shutil
import random
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
import numpy as np

## Configuration

In [None]:
# Set the home directory
HOME = Path.cwd()

# Set paths
COCO_PATH = Path('/kaggle/input/coco-2017-dataset/coco2017')
OUTPUT_PATH = HOME / 'data'

# Choose your desired annotation type: 'bbox', 'keypoints', or 'segmentation'
chosen_annotation_type = 'segmentation'

# Data reduction factors (set to 1.0 to use the full dataset)
train_reduce_factor = 0.1
val_reduce_factor = 0.2

## Helper Functions

In [None]:
# Function to convert COCO bbox to YOLO format
def coco_to_yolo_bbox(bbox, img_width, img_height):
    x_center = (bbox[0] + bbox[2] / 2) / img_width
    y_center = (bbox[1] + bbox[3] / 2) / img_height
    width = bbox[2] / img_width
    height = bbox[3] / img_height
    return [x_center, y_center, width, height]

# Function to convert COCO segmentation to YOLO format
def coco_to_yolo_seg(segmentation, img_width, img_height):
    poly = np.array(segmentation).flatten().tolist()
    return [coord / img_width if i % 2 == 0 else coord / img_height for i, coord in enumerate(poly)]

# Function to convert COCO keypoints to YOLO format
def coco_to_yolo_keypoints(keypoints, img_width, img_height):
    yolo_keypoints = []
    for i in range(0, len(keypoints), 3):
        x, y, v = keypoints[i:i+3]
        if v > 0:  # Only include visible keypoints
            yolo_keypoints.extend([x / img_width, y / img_height, v])
    # Pad with zeros if less than 17 keypoints
    while len(yolo_keypoints) < 51:  # 17 keypoints * 3 values = 51
        yolo_keypoints.extend([0, 0, 0])  # Add dummy keypoints
    return yolo_keypoints

# Function to process a single image
def process_image(img, annotations, cat_id_to_name, split, reduce_factor, valid_class_ids, annotation_type):
    if random.random() > reduce_factor:
        return

    img_id = img['id']
    img_name = img['file_name']
    img_width, img_height = img['width'], img['height']

    # Copy image
    src_img_path = COCO_PATH / f'{split}2017' / img_name
    dst_img_path = OUTPUT_PATH / split / 'images' / img_name
    shutil.copy(src_img_path, dst_img_path)

    # Process annotations for this image
    label_path = OUTPUT_PATH / split / 'labels' / (Path(img_name).stem + '.txt')
    with open(label_path, 'w') as label_file:
        for ann in annotations:
            if ann['image_id'] == img_id:
                cat_id = ann['category_id']
                if cat_id in valid_class_ids:
                    if annotation_type == 'bbox':
                        bbox = ann['bbox']
                        yolo_bbox = coco_to_yolo_bbox(bbox, img_width, img_height)
                        label_file.write(f"{valid_class_ids.index(cat_id)} {' '.join(map(str, yolo_bbox))}\n")
                    elif annotation_type == 'segmentation':
                        segmentation = ann['segmentation'][0] # Assuming single polygon
                        yolo_seg = coco_to_yolo_seg(segmentation, img_width, img_height)
                        label_file.write(f"{valid_class_ids.index(cat_id)} {' '.join(map(str, yolo_seg))}\n")
                    elif annotation_type == 'keypoints' and ann['category_id'] == 1: # Only for person
                        bbox = ann['bbox']
                        keypoints = ann['keypoints']
                        yolo_bbox = coco_to_yolo_bbox(bbox, img_width, img_height)
                        yolo_keypoints = coco_to_yolo_keypoints(keypoints, img_width, img_height)
                        label_file.write(f"0 {' '.join(map(str, yolo_bbox + yolo_keypoints))}\n")

# Function to process dataset
def process_dataset(split, reduce_factor=1.0, annotation_type='bbox'):
    print(f"Processing {split} dataset with annotation type: {annotation_type}...")

    # Load annotations
    if annotation_type == 'keypoints':
        annotation_file = f'person_keypoints_{split}2017.json'
    else:
        annotation_file = f'instances_{split}2017.json'

    with open(COCO_PATH / 'annotations' / annotation_file) as f:
        coco_data = json.load(f)

    # Create category id to name mapping
    global cat_id_to_name  # Declare cat_id_to_name as global
    cat_id_to_name = {cat['id']: cat['name'] for cat in coco_data['categories']}

    # Get valid class IDs (0-79)
    valid_class_ids = sorted(list(cat_id_to_name.keys()))[:80]

    # Create a dictionary of annotations by image_id for faster lookup
    annotations_by_image = {}
    for ann in coco_data['annotations']:
        img_id = ann['image_id']
        if img_id not in annotations_by_image:
            annotations_by_image[img_id] = []
        annotations_by_image[img_id].append(ann)

    # Process images and annotations in parallel
    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
        futures = []
        for img in coco_data['images']:
            future = executor.submit(process_image, img, annotations_by_image.get(img['id'], []),
                                     cat_id_to_name, split, reduce_factor, valid_class_ids, annotation_type)
            futures.append(future)

        for future in tqdm(as_completed(futures), total=len(futures)):
            pass  # We're just using tqdm to show progress

## Data Preparation

In [None]:
# Create output directories
for split in ['train', 'val']:
    for subdir in ['images', 'labels']:
        (OUTPUT_PATH / split / subdir).mkdir(parents=True, exist_ok=True)

# Process datasets
process_dataset('train', reduce_factor=train_reduce_factor, annotation_type=chosen_annotation_type)
process_dataset('val', reduce_factor=val_reduce_factor, annotation_type=chosen_annotation_type)

## Create `config.yaml`

In [None]:
# Create config.yaml based on the chosen annotation type
if chosen_annotation_type == 'keypoints':
    config = {
        'path': str(OUTPUT_PATH),
        'train': 'train/images',
        'val': 'val/images',
        'nc': 1,
        'names': ['person'],
        'kpt_shape': [17, 3],
        'flip_idx': [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
    }
else:
    config = {
        'path': str(OUTPUT_PATH),
        'train': 'train/images',
        'val': 'val/images',
        'nc': 80,
        'names': [cat_id_to_name[id] for id in sorted(list(cat_id_to_name.keys()))[:80]]
    }
with open(OUTPUT_PATH / "config.yaml", 'w') as f:
    json.dump(config, f, indent=2)

print("Dataset preprocessing completed.")

## Install Ultralytics YOLOv8

In [None]:
!pip install ultralytics
!pip uninstall -y wandb  # Optional: Uninstall wandb if not needed

## Model Training

In [None]:
from ultralytics import YOLO

# Choose the appropriate model based on the annotation type
if chosen_annotation_type == 'bbox':
    model = YOLO("yolov8n.pt")
    train_folder = 'detect'
elif chosen_annotation_type == 'segmentation':
    model = YOLO("yolov8n-seg.pt")
    train_folder = 'segment'
elif chosen_annotation_type == 'keypoints':
    model = YOLO("yolov8n-pose.pt")
    train_folder = 'pose'

# Train the model
results = model.train(data=f"{HOME}/data/config.yaml", epochs=1, imgsz=640)

## Visualize Training Results

In [None]:
from IPython.display import Image

# Display training results automatically
print(f"Displaying training results for {chosen_annotation_type}...")

!ls {HOME}/runs/{train_folder}/train

Image(filename=f'{HOME}/runs/{train_folder}/train/confusion_matrix.png', width=600)

In [None]:
Image(filename=f'{HOME}/runs/{train_folder}/train/results.png', width=600)

## Model Prediction

In [None]:
# Load a model
model = YOLO(f'{HOME}/runs/{train_folder}/train/weights/best.pt')  # load a custom model

# Predict with the model
results = model(f'{COCO_PATH}/test2017/000000000016.jpg', save=True, imgsz=320, conf=0.5)  # predict on an image

# Display the prediction image
Image(filename=f'runs/{train_folder}/predict/000000000016.jpg', height=600)

In [None]:
# Predict with the model
results = model(f'{COCO_PATH}/test2017/000000000001.jpg', save=True, imgsz=320, conf=0.5)  # predict on an image

# Display the prediction image
Image(filename=f'runs/{train_folder}/predict/000000000001.jpg', height=600)