In [1]:
from datasets import load_dataset

dataset = load_dataset("tanganke/gtsrb")

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating train split: 100%|██████████| 26640/26640 [00:01<00:00, 17264.47 examples/s]
Generating test split: 100%|██████████| 12630/12630 [00:00<00:00, 26352.64 examples/s]
Generating contrast split: 100%|██████████| 12630/12630 [00:00<00:00, 19845.22 examples/s]
Generating gaussian_noise split: 100%|██████████| 12630/12630 [00:00<00:00, 21421.91 examples/s]
Generating impulse_noise split: 100%|██████████| 12630/12630 [00:00<00:00, 20445.74 examples/s]
Generating jpeg_compression split: 100%|██████████| 12630/12630 [00:00<00:00, 18522.36 examples/s]
Generating motion_blur split: 100%|██████████| 12630/12630 [00:00<00:00, 17053.36 examples/s]
Generating pixelate split: 100%|█████

In [2]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 26640
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 12630
    })
    contrast: Dataset({
        features: ['image', 'label'],
        num_rows: 12630
    })
    gaussian_noise: Dataset({
        features: ['image', 'label'],
        num_rows: 12630
    })
    impulse_noise: Dataset({
        features: ['image', 'label'],
        num_rows: 12630
    })
    jpeg_compression: Dataset({
        features: ['image', 'label'],
        num_rows: 12630
    })
    motion_blur: Dataset({
        features: ['image', 'label'],
        num_rows: 12630
    })
    pixelate: Dataset({
        features: ['image', 'label'],
        num_rows: 12630
    })
    spatter: Dataset({
        features: ['image', 'label'],
        num_rows: 12630
    })
})


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

In [5]:
train_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomRotation(20),
    transforms.RandomResizedCrop(32, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.3, contrast=0.3),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5))
])
test_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),
                         (0.5,0.5,0.5))
])

In [6]:
from torch.utils.data import Dataset

class TrafficDataset(Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.dataset = hf_dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image = self.dataset[idx]["image"]
        label = self.dataset[idx]["label"]

        if self.transform:
            image = self.transform(image)

        return image, label

In [7]:
train_dataset = TrafficDataset(dataset["train"], transform=train_transform)
test_dataset = TrafficDataset(dataset["test"], transform=test_transform)

In [8]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [9]:
class TrafficCNN(nn.Module):
    def __init__(self, num_classes):
        super(TrafficCNN, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 2 * 2, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = len(dataset["train"].features["label"].names)

model = TrafficCNN(num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
epochs = 3

for epoch in range(epochs):
    model.train()
    running_loss = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

Epoch 1/8, Loss: 1.8031
Epoch 2/8, Loss: 0.5953
Epoch 3/8, Loss: 0.3269
Epoch 4/8, Loss: 0.2345
Epoch 5/8, Loss: 0.1908
Epoch 6/8, Loss: 0.1627
Epoch 7/8, Loss: 0.1402
Epoch 8/8, Loss: 0.1200


In [12]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 94.18%


In [17]:
import json

# Save model weights (CPU compatible)
torch.save(model.state_dict(), "trafficcnn.pth")
print("✓ Model weights saved to trafficcnn.pth")

# Save class names
class_names = dataset["train"].features["label"].names
class_names_dict = {str(i): name for i, name in enumerate(class_names)}
with open("class_names.json", "w") as f:
    json.dump(class_names_dict, f, indent=2)
print(f"✓ Class names saved to class_names.json ({len(class_names)} classes)")
print(f"Classes: {class_names[:5]}...")  # Show first 5 classes

✓ Model weights saved to trafficcnn.pth
✓ Class names saved to class_names.json (43 classes)
Classes: ['red and white circle 20 kph speed limit', 'red and white circle 30 kph speed limit', 'red and white circle 50 kph speed limit', 'red and white circle 60 kph speed limit', 'red and white circle 70 kph speed limit']...


In [16]:
from PIL import Image
import torch
import json
import argparse

def predict_image_path(model, transform, image_path, class_names, device="cpu"):
    model.eval()
    device = torch.device(device)

    image = Image.open(image_path).convert("RGB")
    x = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(x)
        probs = torch.softmax(outputs, dim=1)
        conf, pred = torch.max(probs, 1)

    return class_names[pred.item()], float(conf.item()), probs.squeeze(0).cpu()

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--img", required=True, help="Path to image")
    args = parser.parse_args()

    device = "cpu"

    # load class names
    with open("class_names.json", "r") as f:
        class_names = json.load(f)

    # TODO: create model and load weights
    # model = TrafficCNN(num_classes=len(class_names))
    # model.load_state_dict(torch.load("trafficcnn.pth", map_location="cpu"))
    # model.to(torch.device(device))

    # TODO: define same transform as notebook
    # transform = ...

    # predicted_class, confidence, probs = predict_image_path(model, transform, args.img, class_names, device=device)
    # print("Prediction:", predicted_class)
    # print("Confidence:", confidence * 100, "%")


[1;34musage: [0m[1;35mipykernel_launcher.py[0m [[32m-h[0m] [36m--img [33mIMG[0m
ipykernel_launcher.py: error: the following arguments are required: --img


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [15]:
predict_uploaded_image(model, test_transform)

NameError: name 'files' is not defined

In [None]:
import pickle

# example object (replace with your own)
obj = model   # or dataset, dict, list, etc.

with open("HCL_Project.pkl", "wb") as f:
    pickle.dump(obj, f)

print("Pickle file saved!")


Pickle file saved!


In [None]:
from google.colab import files

files.download("HCL_Project.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>