In [None]:
# Step 1: Install ultralytics
!pip install ultralytics

# Step 2: Import necessary libraries
import json
import os
import shutil
import random
from google.colab import files
from ultralytics import YOLO  # For inference with confidence threshold

# Step 3: Upload your COCO JSON file and images
print("Upload your COCO JSON file (e.g., coco_json_formet.json):")
uploaded = files.upload()
json_file = list(uploaded.keys())[0]  # Get the uploaded JSON filename

print("Upload your 263 images (zip file recommended):")
uploaded = files.upload()
image_zip = list(uploaded.keys())[0]  # Get the uploaded zip filename

# Step 4: Unzip the images into a proper directory
!unzip -q {image_zip} -d /content/images_uploaded

# Step 5: Set up directory structure
base_dir = '/content/train_data'
image_dir = '/content/train_data/images/train'
os.makedirs(image_dir, exist_ok=True)

# Move images from unzipped folder to train_data/images/train/
source_dir = '/content/images_uploaded/individual_packs'  # Adjust if nested
for img_file in os.listdir(source_dir):
    if img_file.endswith(('.jpg', '.png')):
        shutil.move(f'{source_dir}/{img_file}', f'{image_dir}/{img_file}')

# Step 6: Load and process the COCO JSON file
with open(json_file, 'r') as f:
    data = json.load(f)

# Function to convert COCO bbox to YOLO format
def coco_to_yolo(bbox, img_width, img_height):
    x_min, y_min, w, h = bbox
    x_center = (x_min + w / 2) / img_width
    y_center = (y_min + h / 2) / img_height
    w_norm = w / img_width
    h_norm = h / img_height
    return [x_center, y_center, w_norm, h_norm]

# Split images into train and val (80/20)
images = data['images']
random.shuffle(images)
val_size = int(0.2 * len(images))  # 53 images for val
train_images = images[val_size:]    # 210 images for train
val_images = images[:val_size]

# Create train/val folders
for split in ['train', 'val']:
    os.makedirs(f'{base_dir}/images/{split}', exist_ok=True)
    os.makedirs(f'{base_dir}/labels/{split}', exist_ok=True)

# Process images and annotations
for split, img_list in [('train', train_images), ('val', val_images)]:
    for img in img_list:
        img_id = img['id']
        img_name = img['file_name']
        img_width = img['width']
        img_height = img['height']

        # Move image
        src_path = f'{image_dir}/{img_name}'
        dst_path = f'{base_dir}/images/{split}/{img_name}'
        if os.path.exists(src_path):
            shutil.move(src_path, dst_path)
        else:
            print(f"Warning: Image {img_name} not found in {image_dir}")

        # Write labels
        annotations = [ann for ann in data['annotations'] if ann['image_id'] == img_id]
        label_file = img_name.replace('.jpg', '.txt').replace('.png', '.txt')
        with open(f'{base_dir}/labels/{split}/{label_file}', 'w') as f:
            for ann in annotations:
                class_id = ann['category_id'] - 1  # Convert to 0-based index
                bbox = coco_to_yolo(ann['bbox'], img_width, img_height)
                f.write(f"{class_id} {bbox[0]:.6f} {bbox[1]:.6f} {bbox[2]:.6f} {bbox[3]:.6f}\n")

# Step 7: Create dataset.yaml
yaml_content = """path: /content/train_data
train: images/train
val: images/val
names:
  0: bensonswitchblack
  1: bensonswitchblue
  2: bensonswitchred
  3: motherbenson
  4: bensonswitchgreen
  5: camelblue
  6: camelgreen
  7: derbyoriginal
  8: goldleafswitch
  9: luckychill
  10: luckycool
  11: luckyoriginal
  12: luckyred
  13: royalsoriginal
  14: royalsnext
  15: marlboroadvance
  16: marlborogold
  17: marlborored
  18: mothergoldleaf
"""

with open('/content/dataset.yaml', 'w') as f:
    f.write(yaml_content)

# Step 8: Verify the setup
print("Train images:", len(os.listdir('/content/train_data/images/train')))
print("Val images:", len(os.listdir('/content/train_data/images/val')))
print("Train labels:", len(os.listdir('/content/train_data/labels/train')))
print("Val labels:", len(os.listdir('/content/train_data/labels/val')))

# Step 9: Train YOLOv8s model
!yolo train model=yolov8s.pt data=/content/dataset.yaml epochs=50 imgsz=640

# Step 10: Load the trained model and validate with confidence threshold
model = YOLO('/content/runs/detect/train/weights/best.pt')  # Load best model
results = model.val(data='/content/dataset.yaml', conf=0.5)  # Validate with confidence threshold 0.5

# Step 11: Download the models
print("Downloading best.pt and last.pt...")
files.download('/content/runs/detect/train/weights/best.pt')
files.download('/content/runs/detect/train/weights/last.pt')

print("Process complete!")

Collecting ultralytics
  Downloading ultralytics-8.3.82-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

Saving coco_json_formet.json to coco_json_formet.json
Upload your 263 images (zip file recommended):


Saving individual_packs.zip to individual_packs.zip
Train images: 211
Val images: 52
Train labels: 211
Val labels: 52
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt'...
100% 21.5M/21.5M [00:00<00:00, 415MB/s]
Ultralytics 8.3.82 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/content/dataset.yaml, epochs=50, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, 

[34m[1mval: [0mScanning /content/train_data/labels/val.cache... 52 images, 0 backgrounds, 0 corrupt: 100%|██████████| 52/52 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:03<00:00,  1.09it/s]


                   all         52        377       0.87      0.823      0.863       0.74
     bensonswitchblack         16         54      0.946      0.981      0.987       0.84
      bensonswitchblue         17         60      0.982      0.933      0.966      0.842
       bensonswitchred         39        142       0.94      0.993      0.993      0.857
          motherbenson         18         91      0.947      0.978      0.981      0.846
             camelblue          1          6       0.75          1      0.995       0.79
            camelgreen          2          4          1          1      0.995      0.874
         luckyoriginal          2          2          1        0.5       0.75      0.675
              luckyred          1          1          0          0          0          0
       marlboroadvance          4          8          1          1      0.995      0.838
          marlborogold          4          6          1          1      0.995      0.908
           marlborore

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Process complete!
