# Connect to google drive

In [36]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


***Pip installs***

In [None]:
!pip install ultralytics

# Pre-requirements

Read README(CVAT).md

# Create yaml file

In [32]:
# Create a YAML configuration file
config_content = """
path: /content/drive/MyDrive/FYP/CVAT_training  # Path to your working directory
train: /content/drive/MyDrive/FYP/CVAT_training/images/train  # path to training images wrt given path
val: /content/drive/MyDrive/FYP/CVAT_training/images/train # path to validation images wrt given path
nc: 8  # Number of classes

names:
  0: car
  1: van
  2: bus
  3: truck
  4: person
  5: motorbike
  6: threewheel
  7: train
"""
# Save the content to a file
with open('/content/drive/MyDrive/FYP/CVAT_training/CVAT_dataset.yaml', 'w') as f:
    f.write(config_content)

print("YAML configuration file created successfully.")

YAML configuration file created successfully.


**Check content of yaml for clarification**

In [33]:
# Check YAML file content
yaml_path = '/content/drive/MyDrive/FYP/CVAT_training/CVAT_dataset.yaml'

# Display the content of the YAML file
with open(yaml_path, 'r') as f:
    print(f.read())



path: /content/drive/MyDrive/FYP/CVAT_training  # Path to your dataset
train: /content/drive/MyDrive/FYP/CVAT_training/images/train  # Training images
val: /content/drive/MyDrive/FYP/CVAT_training/images/train # Validation images
nc: 8  # Number of classes

names:
  0: car
  1: van
  2: bus
  3: truck
  4: person
  5: motorbike
  6: threewheel
  7: train



**Check train and val path**

In [48]:
import os

# Paths
train_path = '/content/drive/MyDrive/FYP/CVAT_training/images/train'
val_path = '/content/drive/MyDrive/FYP/CVAT_training/images/train'

# Verify paths
assert os.path.exists(train_path), f"Training path does not exist: {train_path}"
assert os.path.exists(val_path), f"Validation path does not exist: {val_path}"

# List sample files
print("Sample training images:", os.listdir(train_path)[:5])
print("Sample validation images:", os.listdir(val_path)[:5])


Sample training images: ['frame_0005.jpg', 'frame_0008.txt', 'frame_0007.txt', 'frame_0009.txt', 'frame_0006.txt']
Sample validation images: ['frame_0005.jpg', 'frame_0008.txt', 'frame_0007.txt', 'frame_0009.txt', 'frame_0006.txt']


# Fine Tune Model

In [49]:
# Import the necessary library
from ultralytics import YOLO
import torch
# Load YOLOv8 model (pre-trained)
model = YOLO('yolov8n.pt')  # Ensure this path is correct

# Check if GPU is available
if torch.cuda.is_available():
    device = '0'  # Use the first GPU
else:
    device = 'cpu'  # Fallback to CPU if no GPU is available

# Train the model
model.train(
    data='/content/drive/MyDrive/FYP/CVAT_training/CVAT_dataset.yaml',  # Path to your dataset.yaml
    epochs=20,         # Number of training epochs
    batch=16,          # Batch size
    imgsz=640,         # Image size
    device=device,     # Use GPU if available
    workers=4          # Adjust based on your CPU for data loading (optional)
)

# Save the trained model if needed
model.save('/content/drive/MyDrive/FYP/CVAT_training/CVAT_trained_yolov8_model.pt')  # Save the trained model



Ultralytics 8.3.53 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/content/drive/MyDrive/FYP/CVAT_training/CVAT_dataset.yaml, epochs=20, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=0, workers=4, project=None, name=train16, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, 

[34m[1mtrain: [0mScanning /content/drive/MyDrive/FYP/CVAT_training/labels/train.cache... 106 images, 0 backgrounds, 0 corrupt: 100%|██████████| 106/106 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /content/drive/MyDrive/FYP/CVAT_training/labels/train.cache... 106 images, 0 backgrounds, 0 corrupt: 100%|██████████| 106/106 [00:00<?, ?it/s]


Plotting labels to runs/detect/train16/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000833, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train16[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20      2.25G      1.565      4.243      1.299         61        640: 100%|██████████| 7/7 [00:02<00:00,  3.06it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:02<00:00,  1.98it/s]

                   all        106        342     0.0368      0.837      0.175       0.11






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20       2.2G      1.188      3.657      1.063         55        640: 100%|██████████| 7/7 [00:02<00:00,  2.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:02<00:00,  1.41it/s]

                   all        106        342     0.0271      0.939      0.382      0.236






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      2.19G      1.142      2.604      1.004         67        640: 100%|██████████| 7/7 [00:01<00:00,  4.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  2.70it/s]

                   all        106        342      0.553      0.381        0.5      0.355






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20      2.19G       1.07      1.806     0.9929         55        640: 100%|██████████| 7/7 [00:01<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  3.58it/s]

                   all        106        342     0.0796      0.926      0.591      0.432






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20       2.2G       1.01      1.413     0.9745         55        640: 100%|██████████| 7/7 [00:01<00:00,  4.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  3.23it/s]

                   all        106        342      0.979      0.432      0.652      0.484






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20       2.2G      1.007      1.267     0.9692         57        640: 100%|██████████| 7/7 [00:02<00:00,  3.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  2.10it/s]


                   all        106        342      0.683      0.764      0.697       0.54

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20       2.2G      1.003      1.166     0.9773         54        640: 100%|██████████| 7/7 [00:02<00:00,  3.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  2.59it/s]

                   all        106        342      0.998      0.156      0.669      0.551






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20       2.2G     0.9778      1.055     0.9579         67        640: 100%|██████████| 7/7 [00:01<00:00,  4.95it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  3.43it/s]

                   all        106        342      0.975      0.164       0.75      0.623






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20       2.2G     0.9614     0.9984     0.9543         55        640: 100%|██████████| 7/7 [00:01<00:00,  4.82it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  3.37it/s]

                   all        106        342      0.968      0.557      0.878      0.701






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20       2.2G     0.9717     0.9501     0.9675         57        640: 100%|██████████| 7/7 [00:01<00:00,  5.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  2.49it/s]

                   all        106        342      0.957      0.639      0.919       0.73





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      2.17G     0.9364       1.16     0.9477         30        640: 100%|██████████| 7/7 [00:05<00:00,  1.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  2.65it/s]

                   all        106        342      0.965      0.503      0.935      0.742






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      2.17G     0.9205      1.233     0.9412         30        640: 100%|██████████| 7/7 [00:01<00:00,  4.93it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  2.98it/s]

                   all        106        342      0.959      0.797      0.947      0.748






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      2.17G     0.9134       1.09     0.9529         30        640: 100%|██████████| 7/7 [00:01<00:00,  4.91it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  3.45it/s]

                   all        106        342      0.957      0.821       0.95      0.761






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20      2.17G     0.8975      1.014     0.9461         25        640: 100%|██████████| 7/7 [00:01<00:00,  3.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  2.22it/s]


                   all        106        342      0.955      0.858      0.952       0.76

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      2.17G     0.8863      0.993     0.9466         31        640: 100%|██████████| 7/7 [00:02<00:00,  3.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:02<00:00,  1.82it/s]


                   all        106        342      0.954      0.905      0.952      0.749

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      2.17G     0.9033      1.008     0.9561         31        640: 100%|██████████| 7/7 [00:01<00:00,  4.95it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  3.72it/s]

                   all        106        342      0.953      0.925      0.953      0.751






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      2.17G     0.8602     0.9849     0.9309         28        640: 100%|██████████| 7/7 [00:01<00:00,  5.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  3.77it/s]


                   all        106        342      0.954      0.945      0.952      0.749

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      2.18G     0.8815       0.99     0.9266         27        640: 100%|██████████| 7/7 [00:01<00:00,  5.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  3.50it/s]

                   all        106        342      0.954      0.956      0.952       0.76






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20      2.17G     0.8646     0.9571     0.9332         29        640: 100%|██████████| 7/7 [00:01<00:00,  3.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:02<00:00,  1.70it/s]

                   all        106        342      0.959      0.957      0.952      0.765






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20      2.17G      0.857     0.9343     0.9249         28        640: 100%|██████████| 7/7 [00:02<00:00,  3.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:01<00:00,  2.20it/s]

                   all        106        342      0.964      0.955      0.952      0.774






20 epochs completed in 0.032 hours.
Optimizer stripped from runs/detect/train16/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train16/weights/best.pt, 6.2MB

Validating runs/detect/train16/weights/best.pt...
Ultralytics 8.3.53 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
Model summary (fused): 168 layers, 3,007,208 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:02<00:00,  1.67it/s]


                   all        106        342      0.964      0.955      0.952      0.773
                   van          6          6      0.882      0.833      0.835      0.735
                 truck        100        100      0.984          1      0.988      0.851
             motorbike        106        106      0.999          1      0.995      0.715
            threewheel        100        130      0.992      0.988       0.99      0.794
Speed: 0.2ms preprocess, 3.7ms inference, 0.0ms loss, 5.5ms postprocess per image
Results saved to [1mruns/detect/train16[0m


# Test the model

In [1]:
import cv2
import json
import os
from datetime import datetime
from ultralytics import YOLO  # Ensure you have the correct YOLO library installed

# Timestamp for unique filenames
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

# Load your trained YOLOv8 model
model = YOLO('/content/drive/MyDrive/FYP/CVAT_training/CVAT_trained_yolov8_model.pt')

# Open the video file
video_path = '/content/drive/MyDrive/FYP/input_videos/sl_2.mp4'  # Ensure the correct path format
cap = cv2.VideoCapture(video_path)

# Check if the video opened successfully
if not cap.isOpened():
    print("Error: Couldn't open the video file.")
    exit()

# Create output directories
output_dir = '/content/drive/MyDrive/FYP/CVAT_training/CVAT_trained_output_video2'
os.makedirs(output_dir, exist_ok=True)

frame_dir = f'/content/drive/MyDrive/FYP/CVAT_training/CVAT_trained_extracted_frames2/trained_processed_frames2_{timestamp}'
os.makedirs(frame_dir, exist_ok=True)

json_dir = '/content/drive/MyDrive/FYP/CVAT_training/CVAT_trained_json_data2'
os.makedirs(json_dir, exist_ok=True)

# Get video properties
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Initialize video writer
output_video_path = os.path.join(output_dir, f'CVAT_trained_output_video2_{timestamp}.mp4')
out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

# JSON file for metadata
output_json_path = os.path.join(json_dir, f'CVAT_detected_trained_metadata2_{timestamp}.json')
metadata = []

# Frame processing loop
frame_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run object detection on the frame (no resizing for better accuracy)
    results = model(frame)

    # Get the annotated frame with bounding boxes and labels
    annotated_frame = results[0].plot()

    # Collect detection metadata
    detections = []
    for box in results[0].boxes:
        bbox = box.xyxy.tolist()[0]  # (x1, y1, x2, y2)
        class_id = int(box.cls)
        confidence = float(box.conf)
        detections.append({
            'class': model.names[class_id],
            'confidence': confidence,
            'box': bbox
        })

    metadata.append({
        'frame': frame_count,
        'detections': detections
    })

    # Write the annotated frame to the output video
    out.write(annotated_frame)

    # Save each annotated frame as an image file
    frame_output_path = os.path.join(frame_dir, f'frame_{frame_count:04d}.jpg')
    cv2.imwrite(frame_output_path, annotated_frame)

    # Increment frame count
    frame_count += 1

# Save detection metadata to JSON file
with open(output_json_path, 'w') as json_file:
    json.dump(metadata, json_file, indent=4)

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()

# Print results
print(f"Processed {frame_count} frames.")
print(f"Annotated video saved at: {output_video_path}")
print(f"Detection metadata saved at: {output_json_path}")


0: 384x640 1 truck, 45.8ms
Speed: 4.4ms preprocess, 45.8ms inference, 641.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 truck, 1 threewheel, 6.7ms
Speed: 5.3ms preprocess, 6.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 truck, 6.7ms
Speed: 3.5ms preprocess, 6.7ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 van, 1 truck, 6.0ms
Speed: 2.1ms preprocess, 6.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 van, 8.9ms
Speed: 2.6ms preprocess, 8.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 van, 1 threewheel, 6.7ms
Speed: 1.6ms preprocess, 6.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 van, 7.1ms
Speed: 2.5ms preprocess, 7.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 van, 13.3ms
Speed: 5.4ms preprocess, 13.3ms inference, 2.6ms postprocess per image at shape 