<a href="https://colab.research.google.com/github/Saksham9804/Projects/blob/main/Vehicle_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing libraries and Setup

Importing the required libraries

In [None]:
import torch, os, shutil, numpy as np, matplotlib.pyplot as plt
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from torch import nn, optim
from sklearn.model_selection import train_test_split
import kagglehub
from PIL import Image

Downloading the dataset from Kaggle and setting up my device (GPU if available, otherwise CPU)

In [None]:
path = kagglehub.dataset_download("mohamedmaher5/vehicle-classification")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_dir = "/kaggle/input/vehicle-classification/Vehicles"
output_dir = "/kaggle/working/vehicle-split"

#Splitting Data to Train and Test

Here splitting the dataset into train, validation, and test, and organizing the directory structure.

In [None]:
if os.path.exists(output_dir): shutil.rmtree(output_dir)
for split in ['train', 'val', 'test']: os.makedirs(os.path.join(output_dir, split), exist_ok=True)
for class_name in filter(lambda x: not x.startswith('.'), os.listdir(data_dir)):
    class_dir = os.path.join(data_dir, class_name)
    if not os.path.isdir(class_dir): continue
    images = [img for img in os.listdir(class_dir) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]
    if not images: continue
    train_imgs, temp_imgs = train_test_split(images, test_size=0.3, random_state=42)
    val_imgs, test_imgs = train_test_split(temp_imgs, test_size=0.5, random_state=42)
    for split, imgs in zip(['train', 'val', 'test'], [train_imgs, val_imgs, test_imgs]):
        split_dir = os.path.join(output_dir, split, class_name)
        os.makedirs(split_dir, exist_ok=True)
        for img in imgs: shutil.copy(os.path.join(class_dir, img), os.path.join(split_dir, img))

# Nomalization and Prediction Model

Now, defining the means and standard deviations for normalization (using ImageNet stats) and setting up my transforms and creating datasets and data loaders for each split.

In [None]:
mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
data_transforms = {
    s: transforms.Compose([
        transforms.Resize((224,224)),
        transforms.RandomHorizontalFlip() if s=='train' else transforms.Lambda(lambda x:x),
        transforms.RandomRotation(30) if s=='train' else transforms.Lambda(lambda x:x),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]) for s in ['train','val','test']
}
image_datasets = {x: datasets.ImageFolder(os.path.join(output_dir, x), data_transforms[x]) for x in ['train','val','test']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=32, shuffle=(x=='train')) for x in ['train','val','test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train','val','test']}
class_names = image_datasets['train'].classes
num_classes = len(class_names)

I am initializing a pre-trained ResNet18 model, replacing its final layer to match my number of classes, and preparing to train on my device.

In [None]:
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)
criterion, optimizer = nn.CrossEntropyLoss(), optim.Adam(model.parameters(), lr=0.001)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 139MB/s]


# Training and Testing Data

Writing a function to train my model and save the best one based on validation accuracy.


In [None]:
def train_model(model, criterion, optimizer, epochs=5):
    best_acc = 0.0
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}\n" + '-'*20)
        for phase in ['train', 'val']:
            model.train() if phase=='train' else model.eval()
            running_loss, running_corrects = 0.0, 0
            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)
                    if phase=='train':
                        loss.backward()
                        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
            if phase=='val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                torch.save(model.state_dict(), 'best_model.pth')
    print(f"Best validation accuracy: {best_acc:.4f}")

train_model(model, criterion, optimizer, epochs=5)
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

Epoch 1/5
--------------------
train Loss: 0.6138 Acc: 0.8003




val Loss: 0.5178 Acc: 0.8389
Epoch 2/5
--------------------
train Loss: 0.3668 Acc: 0.8818
val Loss: 1.7095 Acc: 0.6408
Epoch 3/5
--------------------
train Loss: 0.3290 Acc: 0.8926
val Loss: 0.4544 Acc: 0.8735
Epoch 4/5
--------------------


Now, I am writing a function to test my model on the test set, reporting overall and per-class accuracy.


In [None]:
def test_model(model):
    correct, total = 0, 0
    class_correct, class_total = [0]*num_classes, [0]*num_classes
    with torch.no_grad():
        for inputs, labels in dataloaders['test']:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels.data)
            total += labels.size(0)
            for i, label in enumerate(labels):
                class_correct[label] += (preds[i] == label).item()
                class_total[label] += 1
    print(f"\nTest Accuracy: {correct.double()/total:.4f}\nPer-class accuracy:")
    for i, cname in enumerate(class_names):
        print(f"{cname}: {class_correct[i]/class_total[i]:.4f}")

test_model(model)

# Predicting based on Model

Defining a function to predict the class of a single input image given its path.

In [None]:
def predict_image(img_path, model, class_names, transform):
    model.eval()
    image = Image.open(img_path).convert("RGB")
    input_tensor = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(input_tensor)
        _, pred = torch.max(outputs, 1)
        predicted_class = class_names[pred.item()]
    print(f"Predicted Class: {predicted_class}")

Defining a helper function to display a numpy image, reversing normalization for visualization.

Finally, visualizing one sample from each class in the test set along with model predictions.

In [None]:
def imshow(inp, title=None):
    inp = inp.numpy().transpose((1,2,0))
    inp = np.clip(np.array(std)*inp + np.array(mean), 0, 1)
    plt.imshow(inp)
    if title: plt.title(title)
    plt.axis('off')

plt.figure(figsize=(15,10))
num_cols, num_rows = 4, int(np.ceil(len(class_names)/4))
test_dir = os.path.join(output_dir, 'test')
for i, cname in enumerate(class_names):
    img_path = os.path.join(test_dir, cname, os.listdir(os.path.join(test_dir, cname))[0])
    image = datasets.folder.default_loader(img_path)
    input_img = data_transforms['test'](image).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(input_img)
        _, pred = torch.max(outputs, 1)
    plt.subplot(num_rows, num_cols, i+1)
    imshow(data_transforms['test'](image), f"True: {cname}\nPred: {class_names[pred.item()]}")
plt.tight_layout(); plt.show()

Now, for predicting any random given image, enter the colab path for prediction.

In [None]:
image_path = input("Enter the path to the image: ")
predict_image(image_path, model, class_names, data_transforms['test'])

KeyboardInterrupt: Interrupted by user