In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import cv2

In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to fit model input size
    transforms.ToTensor(),          # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [3]:
data_dir = 'C:/Users/Nani/Documents/REACH-AI-systems/Naveen/Dataset'
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

In [4]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [5]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
model = models.resnet50(pretrained=True)



In [7]:
num_classes = len(dataset.classes)  # Number of folders (object categories)
model.fc = nn.Linear(model.fc.in_features, num_classes)

In [8]:
model.train()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
for epoch in range(10):  # Number of epochs
    running_loss = 0.0
    for inputs, labels in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/10], Loss: {running_loss/len(train_loader)}')

Epoch [1/10], Loss: 2.0697299792216373
Epoch [2/10], Loss: 1.5940577043936803
Epoch [3/10], Loss: 1.1739706580455487
Epoch [4/10], Loss: 0.7815073327376292
Epoch [5/10], Loss: 0.6515202808838624
Epoch [6/10], Loss: 0.543705186018577
Epoch [7/10], Loss: 0.5623479657448255
Epoch [8/10], Loss: 0.4582941257036649
Epoch [9/10], Loss: 0.28345766262366223
Epoch [10/10], Loss: 0.29610168274778587


In [11]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [13]:
# Quick test: Print some predictions
dataiter = iter(test_loader)
images, labels =  next(dataiter)

with torch.no_grad():
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)

predicted_labels = [dataset.classes[i] for i in predicted]
true_labels = [dataset.classes[i] for i in labels]

print("Predicted Labels: ", predicted_labels)
print("True Labels: ", true_labels)

Predicted Labels:  ['Ruler', 'Push_Pin', 'Scissors', 'Screwdriver', 'Speaker', 'Flowers', 'Flowers', 'Helmet', 'Spoon', 'Eraser', 'Pencil', 'Push_Pin', 'Spoon', 'Screwdriver', 'Screwdriver', 'Hammer', 'Push_Pin', 'Push_Pin', 'Push_Pin', 'Pencil', 'Flowers', 'Pen', 'Speaker', 'Speaker', 'Speaker', 'Pen', 'Speaker', 'Ruler', 'Speaker', 'Screwdriver', 'Paper_Clip', 'Speaker']
True Labels:  ['Ruler', 'Flowers', 'Scissors', 'Pen', 'Speaker', 'Flowers', 'Flowers', 'Helmet', 'Spoon', 'Eraser', 'Flowers', 'Push_Pin', 'Fork', 'Pen', 'Pencil', 'Hammer', 'Paper_Clip', 'Mug', 'ToothBrush', 'Pen', 'Flowers', 'Pen', 'Mug', 'Helmet', 'Pencil', 'Pencil', 'Hammer', 'ToothBrush', 'Mug', 'Spoon', 'Paper_Clip', 'Push_Pin']
