## Step 1: Install Dependencies

In [1]:
# Install required packages (if needed)
!pip install -q torch torchvision opencv-python pillow matplotlib

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, models as torchvision_models
from torchvision.datasets import ImageFolder
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt

# Check GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"✓ Using device: {device}")
if torch.cuda.is_available():
    print(f"✓ GPU: {torch.cuda.get_device_name(0)}")
    print(f"✓ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

✓ Using device: cuda
✓ GPU: Tesla T4
✓ GPU Memory: 15.83 GB


## Step 2: Upload Dataset

Choose one option:
- **Option A**: Upload ZIP file (Colab web interface only)
- **Option B**: Mount Google Drive (fastest for large files)
- **Option C**: Direct copy from local machine (VS Code connected to Colab)

**⚠️ Important:** The upload widget only works in Colab's web interface at https://colab.research.google.com/

**For large files (1.67 GB):** Use Option B (Google Drive) - it's much faster!

1. Upload `road_dataset_20251118_201844.zip` to your Google Drive
2. Use Option B below to mount Drive and extract

In [3]:
# OPTION A: Upload ZIP file
from google.colab import files

print("Upload your road_dataset_20251118_201844.zip file:")
uploaded = files.upload()

# Get the uploaded filename
zip_filename = list(uploaded.keys())[0]

# Extract
print(f"\nExtracting {zip_filename}...")
!unzip -q "$zip_filename" -d /content/
print("✓ Dataset extracted!")

# Set paths (ZIP contains data/train and data/validation folders)
TRAIN_DIR = '/content/data/train'
VAL_DIR = '/content/data/validation'

print(f"\n✓ Train dir: {TRAIN_DIR}")
print(f"✓ Val dir: {VAL_DIR}")

Upload your road_dataset_20251118_201844.zip file:


KeyboardInterrupt: 

In [None]:
# OPTION B: Mount Google Drive (RECOMMENDED for large files)
# 1. Upload road_dataset_20251118_201844.zip to your Google Drive first
# 2. Run this cell and authorize
# 3. Extract the dataset

from google.colab import drive
drive.mount('/content/drive')

# Extract dataset from Drive (adjust path if needed)
print("\nExtracting dataset from Google Drive...")
!unzip -q /content/drive/MyDrive/road_dataset_20251118_201844.zip -d /content/
print("✓ Dataset extracted!")

# Set paths
TRAIN_DIR = '/content/data/train'
VAL_DIR = '/content/data/validation'

In [5]:
# OPTION C: Direct upload from local machine (VS Code connected to Colab)
# This works when your Colab runtime is connected to VS Code

import os
import shutil

# Path to your local ZIP file (update this path if needed)
LOCAL_ZIP_PATH = r'C:\Users\amitu\Downloads\yolo\road_dataset_20251118_201844.zip'

print("Checking for local ZIP file...")
if os.path.exists(LOCAL_ZIP_PATH):
    print(f"✓ Found: {LOCAL_ZIP_PATH}")
    
    # Copy to Colab runtime
    print("\nCopying ZIP to Colab runtime...")
    shutil.copy2(LOCAL_ZIP_PATH, '/content/road_dataset_20251118_201844.zip')
    print("✓ ZIP copied to /content/")
    
    # Extract
    print("\nExtracting dataset...")
    !unzip -q /content/road_dataset_20251118_201844.zip -d /content/
    print("✓ Dataset extracted!")
    
    # Set paths
    TRAIN_DIR = '/content/data/train'
    VAL_DIR = '/content/data/validation'
    
    print(f"\n✓ Train dir: {TRAIN_DIR}")
    print(f"✓ Val dir: {VAL_DIR}")
else:
    print(f"✗ ZIP file not found at: {LOCAL_ZIP_PATH}")
    print("Please use Option A or B instead")

Checking for local ZIP file...
✗ ZIP file not found at: C:\Users\amitu\Downloads\yolo\road_dataset_20251118_201844.zip
Please use Option A or B instead


In [None]:
# Verify dataset structure
print("\n=== Dataset Structure ===")
print(f"Training classes: {os.listdir(TRAIN_DIR)}")
print(f"Validation classes: {os.listdir(VAL_DIR)}")

print("\nImages per class:")
for class_name in os.listdir(TRAIN_DIR):
    train_count = len(os.listdir(os.path.join(TRAIN_DIR, class_name)))
    val_count = len(os.listdir(os.path.join(VAL_DIR, class_name)))
    print(f"  {class_name}: {train_count} train, {val_count} val")

## Step 3: Build YOLO-Based Model

In [None]:
class RoadConditionModel:
    def __init__(self, num_classes=5, input_size=(224, 224)):
        self.num_classes = num_classes
        self.input_size = input_size
        self.device = device
        self.model = None
        self.class_names = ['Good', 'Minor_Damage', 'Pothole', 'Crack', 'Severe_Damage']
    
    def build_model(self):
        print("\n=== Building Model ===")
        
        # Load pre-trained MobileNetV2 (YOLO-style backbone)
        base_model = torchvision_models.mobilenet_v2(pretrained=True)
        
        # Freeze early layers
        for param in list(base_model.parameters())[:-30]:
            param.requires_grad = False
        
        num_features = base_model.classifier[1].in_features
        base_model.classifier = nn.Identity()
        
        # Custom classification layers
        class YOLORoadModel(nn.Module):
            def __init__(self, backbone, num_features, num_classes):
                super(YOLORoadModel, self).__init__()
                self.backbone = backbone
                self.fc_layers = nn.Sequential(
                    nn.Linear(num_features, 1024),
                    nn.ReLU(),
                    nn.BatchNorm1d(1024),
                    nn.Dropout(0.5),
                    
                    nn.Linear(1024, 512),
                    nn.ReLU(),
                    nn.BatchNorm1d(512),
                    nn.Dropout(0.4),
                    
                    nn.Linear(512, 256),
                    nn.ReLU(),
                    nn.BatchNorm1d(256),
                    nn.Dropout(0.3),
                    
                    nn.Linear(256, 128),
                    nn.ReLU(),
                    nn.Dropout(0.2),
                    
                    nn.Linear(128, num_classes)
                )
            
            def forward(self, x):
                x = self.backbone(x)
                x = self.fc_layers(x)
                return x
        
        self.model = YOLORoadModel(base_model, num_features, self.num_classes).to(self.device)
        
        total_params = sum(p.numel() for p in self.model.parameters())
        trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
        
        print(f"✓ Model built!")
        print(f"  Total parameters: {total_params:,}")
        print(f"  Trainable parameters: {trainable_params:,}")
        print(f"  Device: {self.device}")
        
        return self.model

# Create model
model = RoadConditionModel(num_classes=5)
model.build_model()

## Step 4: Train Model

In [None]:
# Training configuration
EPOCHS = 50
BATCH_SIZE = 32
LEARNING_RATE = 0.0001

print(f"\n=== Training Configuration ===")
print(f"Epochs: {EPOCHS}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Learning Rate: {LEARNING_RATE}")
print(f"Device: {device}")

In [None]:
# Data transforms
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(25),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.3, contrast=0.3),
    transforms.RandomAffine(degrees=0, translate=(0.3, 0.3), scale=(0.7, 1.3)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load datasets
train_dataset = ImageFolder(TRAIN_DIR, transform=train_transforms)
val_dataset = ImageFolder(VAL_DIR, transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print(f"\n✓ Training samples: {len(train_dataset)}")
print(f"✓ Validation samples: {len(val_dataset)}")
print(f"✓ Classes: {train_dataset.classes}")

In [None]:
# Training setup
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.model.parameters()), lr=LEARNING_RATE)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, min_lr=1e-7)

# Training loop
best_val_acc = 0.0
patience_counter = 0
patience = 15
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

print("\n=== Starting Training ===")
print("Press Ctrl+C to stop early\n")

for epoch in range(EPOCHS):
    # Training phase
    model.model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0
    
    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model.model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        
        if (batch_idx + 1) % 10 == 0:
            print(f"  Batch {batch_idx+1}/{len(train_loader)} | Loss: {loss.item():.4f}")
    
    train_loss = train_loss / len(train_loader)
    train_acc = train_correct / train_total
    
    # Validation phase
    model.model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model.model(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    
    val_loss = val_loss / len(val_loader)
    val_acc = val_correct / val_total
    
    scheduler.step(val_loss)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    # Print epoch summary
    print(f"\n{'='*60}")
    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"{'='*60}")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}%")
    print(f"Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc*100:.2f}%")
    print(f"LR: {optimizer.param_groups[0]['lr']:.6f}")
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({
            'model_state_dict': model.model.state_dict(),
            'num_classes': model.num_classes,
            'input_size': model.input_size,
            'class_names': model.class_names
        }, '/content/road_condition_model_best.pth')
        patience_counter = 0
        print(f"✓ Best model saved! (Val Acc: {val_acc*100:.2f}%)")
    else:
        patience_counter += 1
    
    # Early stopping
    if patience_counter >= patience:
        print(f"\n✓ Early stopping triggered after {epoch+1} epochs")
        break

print(f"\n{'='*60}")
print("✓ Training Completed!")
print(f"{'='*60}")
print(f"Best Validation Accuracy: {best_val_acc*100:.2f}%")

## Step 5: Visualize Results

In [None]:
# Plot training results
plt.figure(figsize=(15, 5))

# Loss plot
plt.subplot(1, 2, 1)
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.grid(True)

# Accuracy plot
plt.subplot(1, 2, 2)
plt.plot([acc*100 for acc in history['train_acc']], label='Train Acc')
plt.plot([acc*100 for acc in history['val_acc']], label='Val Acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig('/content/training_results.png', dpi=150, bbox_inches='tight')
plt.show()

print("✓ Training plots saved!")

## Step 6: Test Model

In [None]:
# Test on a sample validation image
def test_prediction(image_path):
    img = Image.open(image_path).convert('RGB')
    
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    img_tensor = transform(img).unsqueeze(0).to(device)
    
    model.model.eval()
    with torch.no_grad():
        outputs = model.model(img_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        confidence, predicted_class = torch.max(probabilities, 1)
    
    plt.figure(figsize=(8, 6))
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"Prediction: {model.class_names[predicted_class.item()]}\n"
              f"Confidence: {confidence.item()*100:.2f}%", fontsize=14)
    plt.tight_layout()
    plt.show()
    
    print(f"\nPrediction: {model.class_names[predicted_class.item()]}")
    print(f"Confidence: {confidence.item()*100:.2f}%")
    print("\nAll probabilities:")
    for i, class_name in enumerate(model.class_names):
        print(f"  {class_name}: {probabilities[0][i].item()*100:.2f}%")

# Test on first validation image
sample_class = os.listdir(VAL_DIR)[0]
sample_image = os.path.join(VAL_DIR, sample_class, os.listdir(os.path.join(VAL_DIR, sample_class))[0])

print(f"Testing on: {sample_image}")
test_prediction(sample_image)

## Step 7: Download Trained Model

In [None]:
# Download model
from google.colab import files

print("Downloading trained model...")
files.download('/content/road_condition_model_best.pth')

print("\nDownloading training plots...")
files.download('/content/training_results.png')

print("\n" + "="*60)
print("✓ TRAINING COMPLETE!")
print("="*60)
print("\nNext steps:")
print("1. Copy road_condition_model_best.pth to your local project")
print("2. Rename to: models/road_condition_model.pth")
print("3. Run locally: python deploy_model.py")
print("4. Start detection: python main.py")
print("="*60)

In [None]:
# Optional: Save to Google Drive as backup
# Uncomment to use

# from google.colab import drive
# drive.mount('/content/drive')
# !cp /content/road_condition_model_best.pth /content/drive/MyDrive/
# !cp /content/training_results.png /content/drive/MyDrive/
# print("✓ Backup saved to Google Drive")