In [7]:
# !pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2
# !pip install https://storage.googleapis.com/tpu-pytorch/wheels/tpuvm/torch_xla-2.0-cp38-cp38-linux_x86_64.whl
# !pip install opencv-python-headless
# !cp -r drive/MyDrive/put_jetbot_dataset /content/
# Then use: "/content/put_jetbot_dataset/dataset/" as root_dir



In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import torchvision
from PIL import Image
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
import time


# Set device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class ApplyCLAHE:
    def __init__(self, clip_limit=2.0, tile_grid_size=(8, 8)):
        self.clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)

    def __call__(self, img):
        img_np = np.array(img)
        img_yuv = cv2.cvtColor(img_np, cv2.COLOR_RGB2YUV)
        img_yuv[:, :, 0] = self.clahe.apply(img_yuv[:, :, 0])
        img_rgb = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2RGB)
        return Image.fromarray(img_rgb)

class JetBotDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.samples = []
        self.transform = transform
        folders = [name for name in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, name))]
        for folder_name in folders:
            csv_path = os.path.join(root_dir, f"{folder_name}.csv")
            folder_path = os.path.join(root_dir, folder_name)
            if not os.path.exists(csv_path):
                continue
            df = pd.read_csv(csv_path, header=None, dtype={0: str, 1: float, 2: float})
            df.columns = ["filename", "forward", "left"]
            for _, row in df.iterrows():
                img_path = os.path.join(folder_path, str(row['filename']).zfill(4) + ".jpg")
                if os.path.exists(img_path):
                    self.samples.append((img_path, float(row["forward"]), float(row["left"])))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, forward, left = self.samples[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        target = torch.tensor([forward, left], dtype=torch.float32)
        return image, target

def get_transforms(augment=True):
    common = [
        ApplyCLAHE(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ]
    if augment:
        return transforms.Compose([
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomRotation(2),
            transforms.RandomApply([transforms.GaussianBlur(3)], p=0.5),
            *common
        ])
    else:
        return transforms.Compose(common)

def create_datasets(root_dir, batch_size=32, train_ratio=0.7, val_ratio=0.15):
    dataset = JetBotDataset(root_dir, transform=None)
    total = len(dataset)
    train_len = int(total * train_ratio)
    val_len = int(total * val_ratio)
    test_len = total - train_len - val_len
    train_ds, val_ds, test_ds = random_split(dataset, [train_len, val_len, test_len])
    train_ds.dataset.transform = get_transforms(augment=True)
    val_ds.dataset.transform = get_transforms(augment=False)
    test_ds.dataset.transform = get_transforms(augment=False)

    loader_args = {
        "batch_size": batch_size,
        "pin_memory": True,
        "num_workers": 2,  # Try 2–4 depending on Colab capacity
    }

    return {
        "train": DataLoader(train_ds, shuffle=True, **loader_args),
        "val": DataLoader(val_ds, **loader_args),
        "test": DataLoader(test_ds, **loader_args)
    }


class ModifiedNvidiaNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, 5, stride=2, padding=2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2)
        self.avgpool = nn.AdaptiveAvgPool2d((4, 4))
        self.fc1 = nn.Linear(128 * 4 * 4, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.avgpool(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = F.relu(self.fc2(x))
        return self.output(x)

def weighted_mse_loss(pred, target):
    weights = torch.tensor([0.3, 1.0], device=pred.device)
    return ((pred - target) ** 2 * weights).mean()

def train_model(model, loaders, epochs=100):
    best_loss = float('inf')
    patience = 17
    counter = 0
    optimizer = optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)
    train_losses, val_losses = [], []

    for epoch in range(epochs):
        if counter >= patience:
            print("Early stopping")
            break

        model.train()
        train_loss = 0.0
        start = time.time()
        for images, targets in tqdm(loaders["train"], desc=f"Epoch {epoch+1}/{epochs} [Train]"):
            images, targets = images.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = weighted_mse_loss(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        print("Train epoch time:", time.time() - start)

        train_loss /= len(loaders["train"])
        train_losses.append(train_loss)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, targets in tqdm(loaders["val"], desc=f"Epoch {epoch+1}/{epochs} [Val]"):
                images, targets = images.to(device), targets.to(device)
                outputs = model(images)
                loss = weighted_mse_loss(outputs, targets)
                val_loss += loss.item()
        val_loss /= len(loaders["val"])
        val_losses.append(val_loss)
        print(f"Epoch {epoch+1}/{epochs} - Train loss: {train_loss:.6f} - Val loss: {val_loss:.6f}")

        if val_loss < best_loss:
            torch.save(model.state_dict(), 'best_model_jetbot.pth')
            best_loss = val_loss
            counter = 0
            print("New best model saved.")
        else:
            counter += 1

    # Plot loss
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.savefig('loss_curves.png')
    plt.show()

# Main
if __name__ == "__main__":
      # import torch
    print("CUDA available:", torch.cuda.is_available())
    print("Current device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No CUDA")

    from google.colab import drive
    drive.mount('/content/drive')
    loaders = create_datasets("/content/put_jetbot_dataset/dataset/", batch_size=32)
    model = ModifiedNvidiaNetwork().to(device)
    print("Model on CUDA?", next(model.parameters()).is_cuda)
    train_model(model, loaders, epochs=100)


Using device: cuda
CUDA available: True
Current device: Tesla T4
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model on CUDA? True


Epoch 1/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 12.13it/s]


Train epoch time: 13.693222284317017


Epoch 1/100 [Val]: 100%|██████████| 36/36 [00:02<00:00, 14.34it/s]


Epoch 1/100 - Train loss: 0.223130 - Val loss: 0.126989
New best model saved.


Epoch 2/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 12.31it/s]


Train epoch time: 13.492705583572388


Epoch 2/100 [Val]: 100%|██████████| 36/36 [00:03<00:00, 11.69it/s]


Epoch 2/100 - Train loss: 0.118270 - Val loss: 0.087482
New best model saved.


Epoch 3/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 12.64it/s]


Train epoch time: 13.141254425048828


Epoch 3/100 [Val]: 100%|██████████| 36/36 [00:03<00:00,  9.72it/s]


Epoch 3/100 - Train loss: 0.101286 - Val loss: 0.088694


Epoch 4/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 12.28it/s]


Train epoch time: 13.525228500366211


Epoch 4/100 [Val]: 100%|██████████| 36/36 [00:02<00:00, 14.28it/s]


Epoch 4/100 - Train loss: 0.096530 - Val loss: 0.089109


Epoch 5/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 12.18it/s]


Train epoch time: 13.63398289680481


Epoch 5/100 [Val]: 100%|██████████| 36/36 [00:04<00:00,  8.83it/s]


Epoch 5/100 - Train loss: 0.093277 - Val loss: 0.078823
New best model saved.


Epoch 6/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 12.04it/s]


Train epoch time: 13.785963296890259


Epoch 6/100 [Val]: 100%|██████████| 36/36 [00:02<00:00, 14.05it/s]


Epoch 6/100 - Train loss: 0.089679 - Val loss: 0.082192


Epoch 7/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 11.93it/s]


Train epoch time: 13.921893119812012


Epoch 7/100 [Val]: 100%|██████████| 36/36 [00:03<00:00, 10.50it/s]


Epoch 7/100 - Train loss: 0.087548 - Val loss: 0.090245


Epoch 8/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 12.67it/s]


Train epoch time: 13.10886526107788


Epoch 8/100 [Val]: 100%|██████████| 36/36 [00:03<00:00, 10.50it/s]


Epoch 8/100 - Train loss: 0.082393 - Val loss: 0.092156


Epoch 9/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 12.23it/s]


Train epoch time: 13.576364517211914


Epoch 9/100 [Val]: 100%|██████████| 36/36 [00:02<00:00, 13.89it/s]


Epoch 9/100 - Train loss: 0.081343 - Val loss: 0.087645


Epoch 10/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 12.00it/s]


Train epoch time: 13.83735203742981


Epoch 10/100 [Val]: 100%|██████████| 36/36 [00:02<00:00, 14.22it/s]


Epoch 10/100 - Train loss: 0.079168 - Val loss: 0.096133


Epoch 11/100 [Train]: 100%|██████████| 166/166 [00:13<00:00, 12.20it/s]


Train epoch time: 13.60526728630066


Epoch 11/100 [Val]: 100%|██████████| 36/36 [00:02<00:00, 14.34it/s]


Epoch 11/100 - Train loss: 0.078831 - Val loss: 0.094438


Epoch 12/100 [Train]:  57%|█████▋    | 95/166 [00:08<00:05, 13.58it/s]