In [None]:
!pip install ultralytics roboflow
!pip install ultralytics pyyaml

Collecting ultralytics
  Downloading ultralytics-8.3.221-py3-none-any.whl.metadata (37 kB)
Collecting roboflow
  Downloading roboflow-1.2.11-py3-none-any.whl.metadata (9.7 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.17-py3-none-any.whl.metadata (14 kB)
Collecting idna==3.7 (from roboflow)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting opencv-python-headless==4.10.0.84 (from roboflow)
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pi-heif<2 (from roboflow)
  Downloading pi_heif-1.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.5 kB)
Collecting pillow-avif-plugin<2 (from roboflow)
  Downloading pillow_avif_plugin-1.5.2-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading ultralytics-8.3.221

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!mkdir /content/datasets

In [None]:
!unzip /content/drive/MyDrive/original_dataset.zip -d /content/datasets

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/datasets/train/labels/KA09HT3410_1721953345167_jpg.rf.c12b702787f12d56db68156b7a87bd09.txt  
  inflating: /content/datasets/train/labels/KA09HT3727_1721459889029_jpg.rf.a4db27b70d9d07967494d7308b4da6df.txt  
  inflating: /content/datasets/train/labels/KA09HT3727_1721459889029_jpg.rf.b61946e176eb79d54351ab0dca105423.txt  
  inflating: /content/datasets/train/labels/KA09HT3785_1727934641570_jpg.rf.217dbd4551c60957f5f4fb1bf34d7a65.txt  
  inflating: /content/datasets/train/labels/KA09HT3785_1727934641570_jpg.rf.d739ae39b3a04879ed9b38b75d1c1a34.txt  
  inflating: /content/datasets/train/labels/KA09HT3845_1721463118814_jpg.rf.14ac22808c6665e55a0c2d850c4814ff.txt  
  inflating: /content/datasets/train/labels/KA09HT3845_1721463118814_jpg.rf.d094fdedf55dcf008be418eea8141929.txt  
  inflating: /content/datasets/train/labels/KA09HT4237_1723500335678_jpg.rf.aa924d5999e18800d6c80773ddce64aa.txt  
  inflating: /c

In [None]:
import os
import random
import shutil
import glob
import yaml # Make sure PyYAML is installed (!pip install pyyaml)

# --- Configuration ---
base_dataset_dir = ""
search_dir = '/content/datasets' # Where you unzipped everything
for root, dirs, files in os.walk(search_dir):
    # Find the directory containing both 'train' subdir and 'data.yaml' file
    if 'train' in dirs and 'data.yaml' in files:
        base_dataset_dir = root
        print(f"✅ Found dataset base directory: {base_dataset_dir}")
        break

if not base_dataset_dir:
    print(f"❌ Error: Could not find the main dataset directory containing 'train' and 'data.yaml' inside {search_dir}. Please check your unzip path.")
else:
    train_img_dir = os.path.join(base_dataset_dir, 'train', 'images')
    train_lbl_dir = os.path.join(base_dataset_dir, 'train', 'labels')

    valid_img_dir = os.path.join(base_dataset_dir, 'valid', 'images')
    valid_lbl_dir = os.path.join(base_dataset_dir, 'valid', 'labels')

    split_percentage = 0.20 # Move 20% of files to validation

    # --- Create Validation Directories ---
    os.makedirs(valid_img_dir, exist_ok=True)
    os.makedirs(valid_lbl_dir, exist_ok=True)
    print("📁 Created 'valid/images' and 'valid/labels' directories.")

    # --- Get List of Training Images ---
    all_train_images = [f for f in os.listdir(train_img_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    if not all_train_images:
         print(f"❌ Error: No images found in {train_img_dir}. Check unzip step.")
    else:
        random.shuffle(all_train_images) # Shuffle for randomness

        # --- Calculate Number of Files to Move ---
        num_to_move = int(len(all_train_images) * split_percentage)
        print(f"🔢 Total training images: {len(all_train_images)}")
        print(f"🚚 Moving {num_to_move} images (and their labels) to validation set...")

        # --- Move Files ---
        moved_count = 0
        skipped_count = 0
        for i in range(num_to_move):
            img_filename = all_train_images[i]
            # Handle potential spaces or special characters in filenames
            base_filename = os.path.splitext(img_filename)[0]
            lbl_filename = base_filename + '.txt'

            src_img_path = os.path.join(train_img_dir, img_filename)
            dst_img_path = os.path.join(valid_img_dir, img_filename)

            src_lbl_path = os.path.join(train_lbl_dir, lbl_filename)
            dst_lbl_path = os.path.join(valid_lbl_dir, lbl_filename)

            # Check if both image and label files exist before moving
            if os.path.exists(src_img_path) and os.path.exists(src_lbl_path):
                try:
                    shutil.move(src_img_path, dst_img_path)
                    shutil.move(src_lbl_path, dst_lbl_path)
                    moved_count += 1
                except Exception as e:
                     print(f"⚠️ Error moving {base_filename}: {e}")
                     skipped_count += 1
            else:
                # Check which file is missing
                missing = []
                if not os.path.exists(src_img_path):
                    missing.append("image")
                if not os.path.exists(src_lbl_path):
                    missing.append("label")
                print(f"⚠️ Warning: Could not find matching {', '.join(missing)} for '{base_filename}' in train dirs. Skipping.")
                skipped_count += 1


        print(f"✅ Successfully moved {moved_count} image/label pairs.")
        if skipped_count > 0:
            print(f"⚠️ Skipped {skipped_count} pairs due to missing files.")
        print("Dataset splitting complete.")

        # --- Automatically Update data.yaml ---
        yaml_path = os.path.join(base_dataset_dir, 'data.yaml')
        if os.path.exists(yaml_path):
            try:
                with open(yaml_path, 'r') as f:
                    data = yaml.safe_load(f)

                # Update paths relative to the base_dataset_dir
                # Use os.path.join for better path handling, though simple strings are okay here
                data['train'] = 'train/images'
                data['val'] = 'valid/images'

                # Remove 'test' key if it exists and the folder doesn't
                if 'test' in data:
                    test_img_path_rel = data.get('test', '').replace('../', '') # Get relative path if exists
                    test_img_path_abs = os.path.join(base_dataset_dir, test_img_path_rel)
                    if not os.path.exists(test_img_path_abs):
                        print("Removing 'test' path from data.yaml as folder not found.")
                        del data['test']
                    else:
                         print("'test' path found and kept in data.yaml.")

                # Ensure nc and names are present
                if 'nc' not in data or 'names' not in data:
                     print("⚠️ Warning: 'nc' or 'names' missing from data.yaml. Training might fail.")


                with open(yaml_path, 'w') as f:
                    yaml.dump(data, f, sort_keys=False, default_flow_style=None)
                print(f"✅ Successfully updated 'train' and 'val' paths in {yaml_path}")

                # Store paths for the next step (used in the subsequent training cell)
                correct_path_to_yaml = yaml_path
                project_root_directory = base_dataset_dir

            except Exception as e:
                print(f"❌ Error updating data.yaml: {e}")
                print("❗ Please manually check and update the 'train' and 'val' paths in your data.yaml file before training.")
                correct_path_to_yaml = None
                project_root_directory = None
        else:
             print(f"❌ Warning: data.yaml not found at {yaml_path}. Cannot automatically update paths.")
             correct_path_to_yaml = None
             project_root_directory = None

# You can now use correct_path_to_yaml and project_root_directory
# in the next cell for training, if they were set successfully.

✅ Found dataset base directory: /content/datasets
📁 Created 'valid/images' and 'valid/labels' directories.
🔢 Total training images: 10295
🚚 Moving 2059 images (and their labels) to validation set...
✅ Successfully moved 2059 image/label pairs.
Dataset splitting complete.
Removing 'test' path from data.yaml as folder not found.
✅ Successfully updated 'train' and 'val' paths in /content/datasets/data.yaml


In [None]:
from ultralytics import YOLO
import os

# --- Check if paths were set by the previous script ---
if 'correct_path_to_yaml' in locals() and correct_path_to_yaml and \
   'project_root_directory' in locals() and project_root_directory and \
   os.path.exists(correct_path_to_yaml):

    print(f"🚀 Starting training...")
    print(f"Using YAML path: {correct_path_to_yaml}")
    print(f"Using Project Root: {project_root_directory}")

    # 1. Load a base pre-trained model (yolov8n.pt is small and fast)
    model = YOLO('yolov8n.pt')

    # 2. Train the model using the split dataset
    results = model.train(
        data=correct_path_to_yaml,       # Path to your updated data.yaml
        cwd=project_root_directory,     # Sets the correct working directory
        epochs=50,                      # Number of training rounds (can increase later)
        imgsz=640,                      # Image size used during preprocessing
        project='Traffic_Violation_Detector' # Folder name for results
    )
    print("✅ Training finished!")

else:
    print("❌ Error: Dataset paths not found or data.yaml doesn't exist.")
    print("Please check the output of the split script cell or manually find the paths.")
    # Manually set paths here if needed, like this:
    # correct_path_to_yaml = '/content/datasets/data.yaml'
    # project_root_directory = '/content/datasets'
    # Then, re-run this cell after uncommenting and setting the paths.

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
🚀 Starting training...
Using YAML path: /content/datasets/data.yaml
Using Project Root: /content/datasets
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 36.8MB/s 0.2s


SyntaxError: '[31m[1mcwd[0m' is not a valid YOLO argument. 

    Arguments received: ['yolo', '-f', '/root/.local/share/jupyter/runtime/kernel-847dc676-51f0-4a91-8464-24019349d16a.json']. Ultralytics 'yolo' commands use the following syntax:

        yolo TASK MODE ARGS

        Where   TASK (optional) is one of ['obb', 'segment', 'classify', 'detect', 'pose']
                MODE (required) is one of ['train', 'val', 'benchmark', 'predict', 'track', 'export']
                ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
                    See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'

    1. Train a detection model for 10 epochs with an initial learning_rate of 0.01
        yolo train data=coco8.yaml model=yolo11n.pt epochs=10 lr0=0.01

    2. Predict a YouTube video using a pretrained segmentation model at image size 320:
        yolo predict model=yolo11n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320

    3. Val a pretrained detection model at batch-size 1 and image size 640:
        yolo val model=yolo11n.pt data=coco8.yaml batch=1 imgsz=640

    4. Export a YOLO11n classification model to ONNX format at image size 224 by 128 (no TASK required)
        yolo export model=yolo11n-cls.pt format=onnx imgsz=224,128

    5. Ultralytics solutions usage
        yolo solutions count or in ['crop', 'blur', 'workout', 'heatmap', 'isegment', 'visioneye', 'speed', 'queue', 'analytics', 'inference', 'trackzone'] source="path/to/video.mp4"

    6. Run special commands:
        yolo help
        yolo checks
        yolo version
        yolo settings
        yolo copy-cfg
        yolo cfg
        yolo solutions help

    Docs: https://docs.ultralytics.com
    Solutions: https://docs.ultralytics.com/solutions/
    Community: https://community.ultralytics.com
    GitHub: https://github.com/ultralytics/ultralytics
     (<string>)

In [None]:
from ultralytics import YOLO
import os

# --- Check if paths were set by the split script ---
# These should still be correct from the split script output
if 'correct_path_to_yaml' in locals() and correct_path_to_yaml and \
   'project_root_directory' in locals() and project_root_directory and \
   os.path.exists(correct_path_to_yaml):

    print(f"🚀 Starting training...")
    print(f"Using YAML path: {correct_path_to_yaml}")

    # 1. Load a base pre-trained model
    model = YOLO('yolov8n.pt')

    # 2. Train the model (No 'cwd' argument)
    results = model.train(
        data=correct_path_to_yaml,       # Path to your updated data.yaml
        epochs=50,                      # Number of training rounds
        imgsz=640,                      # Image size
        project='Traffic_Violation_Detector' # Folder name for results
    )
    print("✅ Training finished!")

else:
    print("❌ Error: Dataset paths not found or data.yaml doesn't exist.")
    print("Please check the output of the split script cell or manually find the paths.")
    # Example:
    # correct_path_to_yaml = '/content/datasets/data.yaml' # Manually set if needed
    # And make sure the paths *inside* data.yaml are correct relative to this file

🚀 Starting training...
Using YAML path: /content/datasets/data.yaml
Ultralytics 8.3.221 🚀 Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/datasets/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, opti

In [None]:
!zip -r /content/traffic_violation_results.zip /content/Traffic_Violation_Detector

  adding: content/Traffic_Violation_Detector/ (stored 0%)
  adding: content/Traffic_Violation_Detector/train/ (stored 0%)
  adding: content/Traffic_Violation_Detector/train/args.yaml (deflated 53%)
  adding: content/Traffic_Violation_Detector/train/val_batch1_labels.jpg (deflated 7%)
  adding: content/Traffic_Violation_Detector/train/val_batch2_labels.jpg (deflated 7%)
  adding: content/Traffic_Violation_Detector/train/confusion_matrix_normalized.png (deflated 23%)
  adding: content/Traffic_Violation_Detector/train/train_batch0.jpg (deflated 2%)
  adding: content/Traffic_Violation_Detector/train/.ipynb_checkpoints/ (stored 0%)
  adding: content/Traffic_Violation_Detector/train/train_batch20600.jpg (deflated 6%)
  adding: content/Traffic_Violation_Detector/train/results.png (deflated 9%)
  adding: content/Traffic_Violation_Detector/train/results.csv (deflated 63%)
  adding: content/Traffic_Violation_Detector/train/val_batch1_pred.jpg (deflated 7%)
  adding: content/Traffic_Violation_Det

In [None]:
!zip -r /content/dataset_files.zip /content/datasets

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/datasets/train/images/KA09EG7080_1726543755034_jpg.rf.3fc7832ef254dbd02f742ce0dc82b7af.jpg (deflated 1%)
  adding: content/datasets/train/images/KA09HY7493_1728372963752_jpg.rf.39fe906f499bf590159923e049b4a73e.jpg (deflated 1%)
  adding: content/datasets/train/images/KA09ET6194_1727696769041_jpg.rf.c74d0b93badb5294efb00340a9ee80ed.jpg (deflated 0%)
  adding: content/datasets/train/images/KA45V5786_1728633540603_jpg.rf.5f6f07eca34b3b8a9c7df0808b1a8a0e.jpg (deflated 2%)
  adding: content/datasets/train/images/KA09HJ3591_1721815889289_jpg.rf.838f3aa2e62e03f124f9f39537e6a081.jpg (deflated 1%)
  adding: content/datasets/train/images/KA55V9248_1728021651390_jpg.rf.26eb28a6bbe687893895bf0c975e326d.jpg (deflated 1%)
  adding: content/datasets/train/images/KA09HZ4761_1720876655433_jpg.rf.9482acf27235add20caf505a63ace926.jpg (deflated 1%)
  adding: content/datasets/train/images/KA09JF3508_1728455374596_jpg.rf.9a1e