# Train YOLOv8 Medium in Colab (High Accuracy)

This notebook trains the **Medium (YOLOv8m)** model instead of Nano.
It is slightly slower but much more accurate, reducing false positives.

### Steps
1.  **Run All Cells**.
2.  **Upload `kaggle.json`** when prompted.
3.  **Use Default Dataset**: Press **Enter** to use `pkdarabi/helmet`.
4.  **Download Model**: It will download `best.pt` automatically.

In [None]:
!pip install ultralytics kaggle

In [None]:
import os
from google.colab import files

# 1. Upload kaggle.json
print("Please upload your 'kaggle.json' file:")
uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  
# Move to correct location
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
print("Kaggle API configured!")

In [None]:
# 2. Download Dataset
# Using 'pkdarabi/helmet' (High Quality)
default_slug = 'pkdarabi/helmet'
dataset_slug = input(f"Enter Kaggle Dataset Slug (Press Enter to use '{default_slug}'): ").strip() or default_slug

if dataset_slug:
    print(f"Downloading {dataset_slug}...")
    !kaggle datasets download -d {dataset_slug}
    
    # Unzip
    zip_name = dataset_slug.split('/')[-1] + ".zip"
    !unzip -q {zip_name} -d dataset
    !rm {zip_name}
    print("Dataset downloaded and extracted to 'dataset/'")
else:
    print("No slug entered. Please re-run cell.")

In [None]:
# 3. Find data.yaml
import glob

yaml_files = glob.glob("dataset/**/data.yaml", recursive=True)

if yaml_files:
    data_yaml = yaml_files[0]
    print(f"Found configuration: {data_yaml}")
else:
    print("Error: Could not find 'data.yaml'. Please check if the dataset is in YOLOv8 format.")
    data_yaml = None

In [None]:
# 4. Train Model (Using YOLOv8m - Medium)
from ultralytics import YOLO

if data_yaml:
    # Load Medium model for better accuracy
    model = YOLO('yolov8m.pt') 

    # Train for 50 epochs
    model.train(data=data_yaml, epochs=50, imgsz=640, project='runs/detect', name='train')
else:
    print("Cannot train without valid data.yaml")

In [None]:
# 5. Export/Download
from google.colab import files
import os
import glob

# Find best.pt recursively
weights_files = glob.glob('**/best.pt', recursive=True)

if weights_files:
    print(f"Found model at: {weights_files[0]}")
    print("Downloading... This is a larger file (~50MB), please wait.")
    files.download(weights_files[0])
else:
    print("Training output not found. Please check manually.")