In [None]:
!pip install torch torchvision matplotlib opencv-python




In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import torchvision.datasets as datasets


In [None]:
# Transform: Resize images to a fixed size and normalize
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize to 128x128
    transforms.ToTensor(),  # Convert to Tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize
])

# Load Caltech 101 Dataset
trainset = datasets.Caltech101(root='./data', download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)

# Since this is a relatively small dataset, we won't use a separate test set for simplicity
testloader = trainloader  # Using train data for validation due to dataset size

# Get class names
classes = trainset.categories


Downloading...
From (original): https://drive.google.com/uc?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp
From (redirected): https://drive.usercontent.google.com/download?id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp&confirm=t&uuid=20854df0-f32d-42c2-a3ac-fd0ecbb207cf
To: /content/data/caltech101/101_ObjectCategories.tar.gz
100%|██████████| 132M/132M [00:04<00:00, 27.9MB/s]


Extracting ./data/caltech101/101_ObjectCategories.tar.gz to ./data/caltech101


Downloading...
From (original): https://drive.google.com/uc?id=175kQy3UsZ0wUEHZjqkUDdNVssr7bgh_m
From (redirected): https://drive.usercontent.google.com/download?id=175kQy3UsZ0wUEHZjqkUDdNVssr7bgh_m&confirm=t&uuid=91f28927-99cf-4c51-816e-847d5658d71a
To: /content/data/caltech101/Annotations.tar
100%|██████████| 14.0M/14.0M [00:00<00:00, 50.4MB/s]


Extracting ./data/caltech101/Annotations.tar to ./data/caltech101


In [None]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)

        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 32 * 32, 512)
        self.fc2 = nn.Linear(512, 101)  # 101 classes in Caltech 101

        # Activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        # First convolutional layer, followed by ReLU and max pooling
        x = self.pool(self.relu(self.conv1(x)))

        # Second convolutional layer, followed by ReLU and max pooling
        x = self.pool(self.relu(self.conv2(x)))

        # Flatten the image for the fully connected layers
        x = x.view(-1, 64 * 32 * 32)

        # Fully connected layers
        x = self.relu(self.fc1(x))
        x = self.fc2(x)

        return x

# Instantiate the model
model = CNNModel()


In [None]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
# Define device (use GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Training function
def train_model(model, trainloader, criterion, optimizer, epochs=1):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(trainloader, 0):
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Print statistics
            running_loss += loss.item()
            if i % 100 == 99:  # Print every 100 mini-batches
                print(f"[Epoch {epoch+1}, Batch {i+1}] Loss: {running_loss / 100:.3f}")
                running_loss = 0.0

# Train the model for 1 epoch
train_model(model, trainloader, criterion, optimizer, epochs=1)


RuntimeError: output with shape [1, 128, 128] doesn't match the broadcast shape [3, 128, 128]

In [None]:
def test_model(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Accuracy: {100 * correct / total:.2f}%")

# Test the model
test_model(model, testloader)


In [None]:
def imshow(img):
    img = img / 2 + 0.5  # Unnormalize the image
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Get some random test images
dataiter = iter(testloader)
images, labels = dataiter.next()

# Print images and ground truth
imshow(torchvision.utils.make_grid(images))
print('Ground Truth:', ' '.join(f'{classes[labels[j]]}' for j in range(8)))

# Predict using the model
images = images.to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)

# Print predicted labels
print('Predicted:', ' '.join(f'{classes[predicted[j]]}' for j in range(8)))
