# Pothole Detection - YOLOv8 Training on Google Colab

This notebook trains a YOLOv8n model for pothole detection.

**Important**: Set Runtime → Change runtime type → GPU (T4)

## Step 1: Setup Environment

In [None]:
# Verify GPU is available
!nvidia-smi

In [None]:
# Install ultralytics
!pip install ultralytics -q

In [None]:
# Mount Google Drive (for saving models)
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Import libraries
from ultralytics import YOLO
import torch
import os

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

## Step 2: Upload Dataset

**Option A**: Upload the `datasets` folder from your local machine

**Option B**: If you saved it to Google Drive, copy from there:
```python
!cp -r /content/drive/MyDrive/pothole_dataset /content/datasets
```

In [None]:
# Upload dataset (uncomment one option)

# Option A: Upload from local computer
# 1. Zip your ai-model/datasets folder locally
# 2. Upload via Files panel on the left
# 3. Uncomment and run:
# !unzip -q datasets.zip -d /content/

# Option B: Copy from Google Drive (if already uploaded)
# !cp -r /content/drive/MyDrive/pothole_dataset /content/datasets

# Verify dataset structure
!ls -R /content/datasets/pothole_combined/

## Step 3: Create data.yaml Configuration

In [None]:
# Create data.yaml
data_yaml_content = """# Dataset configuration for YOLOv8 training

path: /content/datasets/pothole_combined  # Root path (Colab)
train: train/images
val: valid/images
test: test/images

# Class configuration
nc: 1  # Number of classes
names: ['pothole']  # Class names
"""

with open('/content/data.yaml', 'w') as f:
    f.write(data_yaml_content)

print("data.yaml created:")
!cat /content/data.yaml

## Step 4: Preview Sample Images (Optional)

In [None]:
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random
import glob

def visualize_sample():
    img_dir = '/content/datasets/pothole_combined/train/images'
    lbl_dir = '/content/datasets/pothole_combined/train/labels'
    
    images = glob.glob(f"{img_dir}/*")
    sample = random.choice(images)
    img_name = os.path.basename(sample)
    label_file = os.path.join(lbl_dir, img_name.rsplit('.', 1)[0] + '.txt')
    
    img = cv2.imread(sample)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    h, w = img.shape[:2]
    
    fig, ax = plt.subplots(1, figsize=(10, 8))
    ax.imshow(img)
    
    with open(label_file, 'r') as f:
        for line in f:
            _, x_center, y_center, box_w, box_h = map(float, line.strip().split())
            x_center_px = x_center * w
            y_center_px = y_center * h
            box_w_px = box_w * w
            box_h_px = box_h * h
            x1 = x_center_px - box_w_px / 2
            y1 = y_center_px - box_h_px / 2
            
            rect = patches.Rectangle((x1, y1), box_w_px, box_h_px,
                                     linewidth=2, edgecolor='red', facecolor='none')
            ax.add_patch(rect)
    
    ax.axis('off')
    plt.title(f"Sample: {img_name}")
    plt.show()

visualize_sample()

## Step 5: Train YOLOv8n Model

In [None]:
# Load YOLOv8n pretrained model
model = YOLO('yolov8n.pt')

print("Starting training...")
print("This will take approximately 2-3 hours on Colab T4 GPU")

In [None]:
# Train the model
results = model.train(
    data='/content/data.yaml',
    epochs=100,
    imgsz=640,
    batch=16,
    patience=20,
    save=True,
    project='/content/pothole_training',
    name='yolov8n_run1',
    device=0,  # Use GPU
    
    # Optimizer settings
    optimizer='AdamW',
    lr0=0.01,
    lrf=0.01,
    momentum=0.937,
    weight_decay=0.0005,
    
    # Augmentation
    augment=True,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=10.0,
    translate=0.1,
    scale=0.5,
    fliplr=0.5,
    mosaic=1.0,
    mixup=0.1
)

## Step 6: Evaluate Model

In [None]:
# Evaluate on validation set
metrics = model.val()

print("\n" + "="*60)
print("VALIDATION METRICS")
print("="*60)
print(f"mAP@50: {metrics.box.map50:.4f}")
print(f"mAP@50-95: {metrics.box.map:.4f}")
print(f"Precision: {metrics.box.mp:.4f}")
print(f"Recall: {metrics.box.mr:.4f}")
print("="*60)

# Check if meets requirements
if metrics.box.map50 >= 0.75:
    print("✓ Model meets minimum accuracy requirement (mAP@50 >= 75%)")
else:
    print("⚠ Warning: Model accuracy below target (75%). Consider:")
    print("  - Training for more epochs")
    print("  - Using YOLOv8s (larger model)")
    print("  - Adding more training data")

## Step 7: Visualize Training Results

In [None]:
# Display training curves
from IPython.display import Image, display

print("Training Results:")
display(Image(filename='/content/pothole_training/yolov8n_run1/results.png'))

print("\nConfusion Matrix:")
display(Image(filename='/content/pothole_training/yolov8n_run1/confusion_matrix.png'))

## Step 8: Export to TFLite (float16)

In [None]:
# Load the best trained model
best_model = YOLO('/content/pothole_training/yolov8n_run1/weights/best.pt')

print("Exporting to TFLite (float16)...")
tflite_path = best_model.export(
    format='tflite',
    imgsz=640,
    half=True,  # float16 quantization
    int8=False,
    simplify=True
)

print(f"\nTFLite model exported to: {tflite_path}")

## Step 9: Verify TFLite Model

In [None]:
import tensorflow as tf
import numpy as np

# Find the TFLite file
import glob
tflite_files = glob.glob('/content/pothole_training/yolov8n_run1/weights/*.tflite')
tflite_model_path = [f for f in tflite_files if 'float16' in f][0]

print(f"Verifying: {tflite_model_path}")

# Check file size
file_size_mb = os.path.getsize(tflite_model_path) / (1024 * 1024)
print(f"\nFile size: {file_size_mb:.2f} MB")

if file_size_mb > 10:
    print("⚠ WARNING: Model is larger than 10MB")
else:
    print("✓ Model size is acceptable (<10MB)")

# Load and test
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print(f"\nInput shape: {input_details[0]['shape']}")
print(f"Input dtype: {input_details[0]['dtype']}")
print(f"Output shape: {output_details[0]['shape']}")
print(f"Output dtype: {output_details[0]['dtype']}")

# Test inference
input_shape = input_details[0]['shape']
test_input = np.random.random(input_shape).astype(np.float32)
interpreter.set_tensor(input_details[0]['index'], test_input)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])

print(f"\n✓ Inference test successful")
print(f"Output shape: {output_data.shape}")
print("\n" + "="*60)
print("MODEL READY FOR ANDROID DEPLOYMENT")
print("="*60)

## Step 10: Save to Google Drive

In [None]:
# Create output directory in Google Drive
!mkdir -p /content/drive/MyDrive/pothole_model

# Copy model files
!cp /content/pothole_training/yolov8n_run1/weights/best.pt /content/drive/MyDrive/pothole_model/
!cp /content/pothole_training/yolov8n_run1/weights/*.tflite /content/drive/MyDrive/pothole_model/
!cp /content/pothole_training/yolov8n_run1/results.csv /content/drive/MyDrive/pothole_model/
!cp /content/pothole_training/yolov8n_run1/results.png /content/drive/MyDrive/pothole_model/
!cp /content/pothole_training/yolov8n_run1/confusion_matrix.png /content/drive/MyDrive/pothole_model/

print("\n✓ Model files saved to Google Drive/pothole_model/")
print("\nNext steps:")
print("1. Download the .tflite file from Google Drive")
print("2. Place it in: android/app/src/main/assets/models/")
print("3. Proceed with Android app development")

## Step 11: Test Inference on Sample Image (Optional)

In [None]:
# Run inference on a test image
test_images = glob.glob('/content/datasets/pothole_combined/test/images/*')
sample_image = random.choice(test_images)

results = best_model.predict(source=sample_image, conf=0.5)

# Display results
from PIL import Image
result_image = results[0].plot()
plt.figure(figsize=(12, 8))
plt.imshow(result_image)
plt.axis('off')
plt.title(f"Inference Result - Detected {len(results[0].boxes)} pothole(s)")
plt.show()

print(f"\nDetections: {len(results[0].boxes)}")
for i, box in enumerate(results[0].boxes):
    print(f"Pothole {i+1}: Confidence = {box.conf[0]:.2f}")