In [None]:
import subprocess
import sys

packages = ['ultralytics', 'opencv-python', 'pandas', 'numpy', 'matplotlib', 'seaborn', 'pillow', 'pyyaml']

for pkg in packages:
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', pkg])

print('Dependencies installed')

In [None]:
from ultralytics import YOLO
from pathlib import Path
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import json
import yaml
from datetime import datetime
from collections import Counter

np.random.seed(42)

print('Modules imported')

## Dataset Configuration

In [None]:
root = Path.cwd()
dataset_path = root / "ATCC"
config_file = dataset_path / "data.yaml"

print(f'Project root: {root}')
print(f'Dataset: {dataset_path}')
print(f'Config: {config_file}')
print(f'Config exists: {config_file.exists()}')

In [None]:
with open(config_file) as f:
    config = yaml.safe_load(f)

print('Configuration:')
print(json.dumps(config, indent=2))

classes = config.get('names', {})
print(f'\nClasses ({len(classes)}):' )
for idx, name in classes.items():
    print(f'  {idx}: {name}')

In [None]:
def count_images(path):
    if not path.exists():
        return 0
    return len(list(path.glob('*.jpg'))) + len(list(path.glob('*.png')))

images_dir = dataset_path / "bdd100k" / "bdd100k" / "images" / "100k"

train = count_images(images_dir / "train")
val = count_images(images_dir / "val")
total = train + val

print(f'Train: {train}')
print(f'Validation: {val}')
print(f'Total: {total}')

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))

labels = ['Train', 'Validation']
values = [train, val]
colors = ['#1f77b4', '#ff7f0e']

ax1.bar(labels, values, color=colors, edgecolor='black', linewidth=1.2)
ax1.set_ylabel('Images', fontsize=11)
ax1.set_title('Dataset Split', fontweight='bold')
ax1.grid(axis='y', alpha=0.3)

for i, v in enumerate(values):
    ax1.text(i, v + 200, str(v), ha='center', fontweight='bold')

ax2.pie(values, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
ax2.set_title('Proportion', fontweight='bold')

plt.tight_layout()
plt.savefig('atcc_dataset_split.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
class_list = pd.DataFrame({
    'Class': list(classes.values()),
    'Index': list(classes.keys())
})

fig, ax = plt.subplots(figsize=(9, 6))
ax.axis('tight')
ax.axis('off')

table = ax.table(cellText=class_list.values, colLabels=class_list.columns,
                cellLoc='center', loc='center')
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1, 2)

for i in range(len(class_list.columns)):
    table[(0, i)].set_facecolor('#1f77b4')
    table[(0, i)].set_text_props(weight='bold', color='white')

for i in range(1, len(class_list) + 1):
    for j in range(len(class_list.columns)):
        if i % 2 == 0:
            table[(i, j)].set_facecolor('#f0f0f0')

plt.title('Traffic Classes', fontweight='bold', pad=20)
plt.savefig('atcc_classes.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
sample_path = images_dir / "train"
samples = list(sample_path.glob('*.jpg'))[:6]

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.ravel()

for idx, filepath in enumerate(samples):
    try:
        img = Image.open(filepath)
        axes[idx].imshow(img)
        axes[idx].set_title(filepath.name, fontsize=8)
        axes[idx].axis('off')
    except:
        axes[idx].axis('off')

plt.tight_layout()
plt.savefig('atcc_samples.png', dpi=150, bbox_inches='tight')
plt.show()

## Training

In [None]:
print('Loading model')
model = YOLO('yolov8x.pt')
print('Model loaded')

In [None]:
print(f'Start time: {datetime.now()}')

results = model.train(
    data=str(config_file),
    epochs=150,
    imgsz=640,
    batch=8,
    patience=25,
    device=0,
    optimizer='SGD',
    momentum=0.937,
    weight_decay=0.0005,
    warmup_epochs=5.0,
    lr0=0.01,
    name='atcc_training',
    project='runs',
    exist_ok=True,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=10.0,
    translate=0.1,
    scale=0.5,
    flipud=0.5,
    fliplr=0.5
)

print(f'End time: {datetime.now()}')

## Validation

In [None]:
print('Running validation')
val_results = model.val()

print(f'mAP50: {val_results.box.map50:.4f}')
print(f'mAP50-95: {val_results.box.map:.4f}')
print(f'Precision: {val_results.box.mp:.4f}')
print(f'Recall: {val_results.box.mr:.4f}')

In [None]:
results_csv = Path('runs/atcc_training/results.csv')

if results_csv.exists():
    df = pd.read_csv(results_csv)
    print(f'Results shape: {df.shape}')
    print(df[['epoch', 'train/loss', 'val/loss', 'metrics/mAP50']].head())

In [None]:
if results_csv.exists():
    df = pd.read_csv(results_csv)
    
    fig, axes = plt.subplots(2, 3, figsize=(16, 10))
    
    axes[0, 0].plot(df['epoch'], df['train/loss'], linewidth=2.2, color='#d62728')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].set_title('Train Loss')
    axes[0, 0].grid(alpha=0.3)
    
    axes[0, 1].plot(df['epoch'], df['val/loss'], linewidth=2.2, color='#ff7f0e')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].set_title('Val Loss')
    axes[0, 1].grid(alpha=0.3)
    
    axes[0, 2].plot(df['epoch'], df['metrics/mAP50'], linewidth=2.2, marker='o', markersize=3)
    axes[0, 2].plot(df['epoch'], df['metrics/mAP50-95'], linewidth=2.2, marker='s', markersize=3)
    axes[0, 2].set_ylabel('mAP')
    axes[0, 2].set_title('Mean Average Precision')
    axes[0, 2].legend(['mAP50', 'mAP50-95'])
    axes[0, 2].grid(alpha=0.3)
    
    axes[1, 0].plot(df['epoch'], df['metrics/precision'], linewidth=2.2, color='#2ca02c')
    axes[1, 0].set_ylabel('Precision')
    axes[1, 0].set_title('Precision')
    axes[1, 0].grid(alpha=0.3)
    
    axes[1, 1].plot(df['epoch'], df['metrics/recall'], linewidth=2.2, color='#1f77b4')
    axes[1, 1].set_ylabel('Recall')
    axes[1, 1].set_title('Recall')
    axes[1, 1].grid(alpha=0.3)
    
    axes[1, 2].plot(df['epoch'], df['train/box_loss'], linewidth=2.2, label='Train', alpha=0.7)
    axes[1, 2].plot(df['epoch'], df['val/box_loss'], linewidth=2.2, label='Val', alpha=0.7)
    axes[1, 2].set_ylabel('Loss')
    axes[1, 2].set_title('Box Loss')
    axes[1, 2].legend()
    axes[1, 2].grid(alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('atcc_metrics.png', dpi=150, bbox_inches='tight')
    plt.show()

## Inference Testing

In [None]:
val_path = images_dir / "val"
val_images = list(val_path.glob('*.jpg'))[:6]

print(f'Running inference on {len(val_images)} images')

pred_results = []
for img_file in val_images:
    res = model(str(img_file), conf=0.4)
    pred_results.append((img_file, res[0]))

print('Inference done')

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(17, 11))
axes = axes.ravel()

for idx, (img_file, pred) in enumerate(pred_results):
    annotated = pred.plot()
    img_rgb = annotated[..., ::-1]
    
    axes[idx].imshow(img_rgb)
    axes[idx].set_title(f'Detections: {len(pred.boxes)}')
    axes[idx].axis('off')

plt.tight_layout()
plt.savefig('atcc_predictions.png', dpi=150, bbox_inches='tight')
plt.show()

## Model Export

In [None]:
save_path = 'yolo_ATCC.pt'
model.save(save_path)

size_mb = Path(save_path).stat().st_size / 1024 / 1024

print(f'Model saved: {save_path}')
print(f'Size: {size_mb:.2f} MB')

In [None]:
print('\n' + '-'*50)
print('ATCC TRAINING SUMMARY')
print('-'*50)
print(f'Model: YOLOv8 XLarge')
print(f'Task: Traffic Detection and Classification')
print(f'Resolution: 640x640')
print(f'Classes: {len(classes)}')
print(f'Epochs: 150')
print(f'Batch: 8')
print(f'\nDataset (BDD100K):')
print(f'Total: {total}')
print(f'Train: {train}')
print(f'Val: {val}')
print(f'\nModel File: {save_path}')
print(f'File Size: {size_mb:.2f} MB')
print('-'*50)