# Fashion Object Detection

In [None]:
import warnings
warnings.filterwarnings("ignore")

### Import required libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import json
from PIL import Image
import io
import random

In [None]:
json_train_path = "annotations/instances_train2024.json"

In [None]:
with open(json_train_path, "r") as f:
    data = json.load(f)

In [None]:
# Remove the "info" key
data.pop("info", None)

# Convert "categories" to a DataFrame
categories = pd.DataFrame(data["categories"])

# Convert "annotations" to a DataFrame
annotations = pd.DataFrame(data["annotations"])

In [None]:
categories 

In [None]:
annotations

In [None]:
# Assuming you have the data loaded into a DataFrame called 'annotations'
all_image_ids = annotations['image_id'].unique()  # Get a list of all unique image IDs

# Select a random sample of 1000 image IDs
sample_image_ids = random.sample(list(all_image_ids), 500)

# Create a new DataFrame with the sampled annotations
sampled_annotations = annotations[annotations['image_id'].isin(sample_image_ids)]

# Get all unique image IDs in the sampled dataset
sampled_image_ids = sampled_annotations['image_id'].unique()

# Create a new DataFrame with all annotations for the sampled image IDs
final_sampled_annotations = annotations[annotations['image_id'].isin(sampled_image_ids)]

unique_categories = final_sampled_annotations['category_id'].unique()
category_mapping = {cat: idx for idx, cat in enumerate(unique_categories)}
final_sampled_annotations['remapped_category_id'] = final_sampled_annotations['category_id'].map(category_mapping)

print(f"Number of sampled annotations: {len(final_sampled_annotations)}")

In [None]:
json_data = final_sampled_annotations.to_json(orient='records')

# Write JSON data to a file
with open('json_annotations.json', 'w') as json_file:
    json_file.write(json_data)

In [None]:
sampled_annotations = pd.read_json('json_annotations.json')
sampled_annotations

In [None]:
# Count the number of items in each category
category_counts = sampled_annotations['category_id'].value_counts()

# Create a bar plot
plt.figure(figsize=(10, 6))
category_counts.plot(kind='bar')

# Add title and axis labels
plt.title('Number of Items per Category')
plt.xlabel('Category ID')
plt.ylabel('Count')

# Rotate x-axis labels for better visibility
plt.xticks(rotation=45)

# Annotate each bar with its count
for index, value in enumerate(category_counts):
    plt.text(index, value + 1, str(value), ha='center', va='bottom')

# Show the plot
plt.show()

In [None]:
from sklearn.model_selection import train_test_split

# Convert category_id to categorical data type
final_sampled_annotations['category_id'] = final_sampled_annotations['category_id'].astype('category')
final_sampled_annotations

# Get the category counts across the entire dataset
category_counts = final_sampled_annotations['category_id'].value_counts()

# Filter out categories with fewer than 50 images
valid_categories = category_counts[category_counts >= 50].index

# Filter the dataset to include only valid categories
filtered_dataset = final_sampled_annotations[final_sampled_annotations['category_id'].isin(valid_categories)]

unique_remapped_id = filtered_dataset['remapped_category_id'].unique()

# Display total number of unique categories
unique_categories_count = len(filtered_dataset['category_id'].unique())
print(f"Total number of unique categories with 50 or more images: {unique_categories_count}")

# Split the filtered dataset into train, val, and test sets
train_val_data, test_data = train_test_split(filtered_dataset, test_size=0.1, random_state=42)
train_data, val_data = train_test_split(train_val_data, test_size=0.2, random_state=42)

print(f"Train set size: {len(train_data)}")
print(f"Validation set size: {len(val_data)}")
print(f"Test set size: {len(test_data)}")

In [None]:
# Determine the number of unique rows in each dataframe based on "image_id"
unique_rows_train = train_data['image_id'].nunique()
unique_rows_val = val_data['image_id'].nunique()
unique_rows_test = test_data['image_id'].nunique()

print(f"Unique rows in train set based on image_id: {unique_rows_train}")
print(f"Unique rows in validation set based on image_id: {unique_rows_val}")
print(f"Unique rows in test set based on image_id: {unique_rows_test}")

In [None]:
import os
import shutil

# Specify the original folder containing all images
original_folder = 'images/train/'

# Specify the new folder location for the main dataset
main_dataset_folder = 'main_dataset'

# Remove the main dataset folder if it exists to wipe out previous data
if os.path.exists(main_dataset_folder):
    shutil.rmtree(main_dataset_folder)

# Create the main dataset folder
os.makedirs(main_dataset_folder)

# Create the main dataset folder if it doesn't exist
os.makedirs(main_dataset_folder, exist_ok=True)

# Create subfolders for train, val, and test sets
train_folder = os.path.join(main_dataset_folder, 'train')
val_folder = os.path.join(main_dataset_folder, 'val')
test_folder = os.path.join(main_dataset_folder, 'test')

# Create subfolders if they don't exist
os.makedirs(train_folder, exist_ok=True)
os.makedirs(val_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)

# Copy images to the respective subfolders
for df, folder in zip([train_data, val_data, test_data], [train_folder, val_folder, test_folder]):
    for image_id in df['image_id'].unique():
        src_path = os.path.join(original_folder, f"{image_id}.jpg")  # Assuming images have .jpg extension
        if os.path.isfile(src_path):
            dst_path = os.path.join(folder, f"{image_id}.jpg")
            shutil.copy(src_path, dst_path)
        else:
            print(f"Image not found: {src_path}")

In [None]:
import os
import shutil
from tqdm import tqdm
import pandas as pd
from PIL import Image

# Load the sampled annotations
sampled_annotations = pd.read_json('json_annotations.json')

# Path to the split dataset folders
train_folder = 'main_dataset/train'
val_folder = 'main_dataset/val'
test_folder = 'main_dataset/test'

# Path to save the YOLO-formatted data
yolo_data_path = 'yolo_data'

# Remove the yolo_data folder if it exists
if os.path.exists(yolo_data_path):
    shutil.rmtree(yolo_data_path)

os.makedirs(yolo_data_path, exist_ok=True)

# Create separate folders for images and labels
for folder in ['train', 'val', 'test']:
    os.makedirs(os.path.join(yolo_data_path, folder, 'images'), exist_ok=True)
    os.makedirs(os.path.join(yolo_data_path, folder, 'labels'), exist_ok=True)

# Function to convert annotations to YOLO format
def convert_to_yolo_format(annotations, image_width, image_height):
    yolo_annotations = []
    for annotation in annotations:
        category_id = annotation['category_id']
        bbox = annotation['bbox']
        x_center = (bbox[0] + bbox[2] / 2) / image_width
        y_center = (bbox[1] + bbox[3] / 2) / image_height
        width = bbox[2] / image_width
        height = bbox[3] / image_height
        yolo_annotations.append(f"{category_id} {x_center} {y_center} {width} {height}")
    return yolo_annotations

# Function to process each dataset folder
def process_folder(folder_name, dataset_folder):
    for image_id in tqdm(os.listdir(dataset_folder), desc=f"Processing {folder_name}"):
        image_path = os.path.join(dataset_folder, image_id)
        if os.path.isfile(image_path):
            # Copy the image to the YOLO data folder
            shutil.copy(image_path, os.path.join(yolo_data_path, folder_name, 'images', image_id))

            # Get the image annotations from the sampled annotations
            image_annotations = sampled_annotations[sampled_annotations['image_id'] == int(image_id.split('.')[0])]

            if len(image_annotations) > 0:
                # Get the image dimensions
                image = Image.open(image_path)
                image_width, image_height = image.size

                # Convert annotations to YOLO format
                yolo_annotations = convert_to_yolo_format(image_annotations.to_dict('records'), image_width, image_height)

                # Save the YOLO annotations to a text file
                with open(os.path.join(yolo_data_path, folder_name, 'labels', image_id.split('.')[0] + '.txt'), 'w') as f:
                    f.write('\n'.join(yolo_annotations))

# Process each dataset folder
process_folder('train', train_folder)
process_folder('val', val_folder)
process_folder('test', test_folder)

In [None]:
home = os.getcwd()

In [None]:
import os

# Path to the dataset folder
dataset_path = f'{home}/yolo_data'

# Get the number of classes from the annotations
num_classes = unique_categories_count

# Get the class names from the categories dataframe
class_names = filtered_dataset['category_id'].unique().tolist()

# Create the data.yml content
data_yml_content = f"""\
# Data path
path: ./{dataset_path}

# Train and validation data as 1) dict or 2) list of dict
train: ../{dataset_path}/train/images
val: ../{dataset_path}/val/images

# Classes
nc: 15
names: {unique_remapped_id}
"""

# Save the data.yml file
with open('data.yaml', 'w', encoding='utf-8') as f:
    f.write(data_yml_content)

In [None]:
!python yolov9/train.py \
--batch 16 --epochs 25 --img 640 --device 0 --min-items 0 --close-mosaic 15 \
--data data.yaml \
--weights {home}/weights/gelan-c.pt \
--cfg yolov9/models/detect/gelan-c.yaml \
--hyp hyp.scratch-high.yaml