In [1]:
import torch 
from torch import nn

import numpy as np
from PIL import Image
import torch.optim as optim 

import torchvision
import torchvision.transforms as transforms
from torchvision import models 
from tqdm import tqdm
import os

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
    # transforms.Resize(224,224)
])

In [3]:
train_data = torchvision.datasets.CIFAR100(root='data',download = True,transform = transform,train = True)
test_data = torchvision.datasets.CIFAR100(root='data',download = True,transform = transform,train = False)

In [4]:
class_names = train_data.classes  # List of class names
print("CIFAR-100 Classes:")
print(class_names,len(class_names))

CIFAR-100 Classes:
['apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone', 'television', 'tiger', 'tractor', 'train', 'trout', 'tulip', 'turtle', 'wardrobe', 'wha

In [5]:
train_loader = torch.utils.data.DataLoader(
    train_data,
    batch_size = 64,
    shuffle=  True,
    num_workers = 2
)
test_loader = torch.utils.data.DataLoader(
    test_data,
    batch_size = 64,
    shuffle=  True,
    num_workers = 2
)

In [6]:
image,label = train_data[0]
print(f"{image,label}\nimage shape = {image.shape}")

(tensor([[[ 1.0000,  1.0000,  1.0000,  ...,  0.5294,  0.6627,  0.4275],
         [ 1.0000,  0.9922,  0.9922,  ...,  0.3333,  0.2627,  0.1451],
         [ 1.0000,  0.9922,  1.0000,  ...,  0.4824,  0.3020, -0.0510],
         ...,
         [ 0.1608,  0.1137,  0.0980,  ..., -0.7647, -0.4902, -0.4039],
         [-0.0431, -0.0588, -0.0118,  ..., -0.8275, -0.2392,  0.1059],
         [-0.3176, -0.3098, -0.2078,  ..., -0.7333, -0.1765,  0.0824]],

        [[ 1.0000,  1.0000,  1.0000,  ...,  0.6078,  0.7569,  0.5216],
         [ 1.0000,  0.9922,  0.9922,  ...,  0.3804,  0.3176,  0.2078],
         [ 1.0000,  0.9922,  1.0000,  ...,  0.5608,  0.3961,  0.0431],
         ...,
         [ 0.4510,  0.4275,  0.4039,  ..., -0.8667, -0.5137, -0.3961],
         [ 0.2314,  0.2157,  0.2549,  ..., -0.8745, -0.1216,  0.2627],
         [-0.0431, -0.0431,  0.0510,  ..., -0.7176,  0.0431,  0.3569]],

        [[ 1.0000,  1.0000,  1.0000,  ...,  0.5137,  0.6000,  0.3098],
         [ 1.0000,  0.9922,  0.9922,  ...,  

In [7]:
# Load pretrained ResNet-18 model
resnet_model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Modify the final layer to match CIFAR-10 (10 classes)
num_features = resnet_model.fc.in_features  # Get the input size of the last layer
resnet_model.fc = nn.Linear(num_features, 100)  # Replace with 10 output classes

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet_model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [8]:
# class NeuralNet(nn.Module):
#     def __init__(self,input_shape,output_shape):
#         super().__init__()
#         self.conv_1 = nn.Sequential(
#             nn.Conv2d(in_channels = input_shape,
#                      out_channels = 12,
#                      kernel_size = 3,
#                      stride = 1,
#                      padding = 1), #output size = (12,32,32)
#             nn.LeakyReLU(),
#             nn.BatchNorm2d(12),
#             nn.MaxPool2d(kernel_size = 2,
#                         stride = 2) #output = (12,16,16)
#         )
#         self.conv_2 = nn.Sequential(
#             nn.Conv2d(in_channels = 12,
#                      out_channels = 24,
#                      kernel_size = 3,
#                      stride = 1,
#                      padding = 1), #output size = (24,16,16)
#             nn.LeakyReLU(),
#             nn.BatchNorm2d(24),
#             nn.MaxPool2d(kernel_size = 2,
#                         stride = 2) # output size =(24,8,8)
#         )
#         self.conv_3 = nn.Sequential(
#             nn.Conv2d(in_channels = 24,
#                      out_channels = 48,
#                      kernel_size = 3,
#                      stride = 1,
#                      padding = 1), #output size = (48,8,8)
#             nn.LeakyReLU(),
#             nn.BatchNorm2d(48),
#             nn.MaxPool2d(kernel_size = 2,
#                         stride = 2) # output size =(48,4,4)
#         )
#         self.conv_4 = nn.Sequential(
#             nn.Conv2d(in_channels = 48,
    #                  out_channels = 96,
    #                  kernel_size = 3,
    #                  stride = 1,
    #                  padding = 1), #output size = (96,4,4)
    #         nn.LeakyReLU(),
    #         nn.BatchNorm2d(96),
    #         nn.MaxPool2d(kernel_size = 2,
    #                     stride = 2) # output size =(96,2,2)
    #     )
    #     self.fc1 = nn.Linear( #fallten 
    #         96*2*2,120
    #     )
    #     self.fc2 = nn.Linear(120,84)
    #     self.fc3 = nn.Linear(84,32)
    #     self.fc4 = nn.Linear(32,output_shape) #fc = fully connected
    #     self.dropout = nn.Dropout(0.3)
    #     self.leakyrelu = nn.LeakyReLU()

    # def forward(self,x):
    #     x = self.conv_1(x)
    #     x = self.conv_2(x)
    #     x = self.conv_3(x)
    #     x = self.conv_4(x)

    #     x = torch.flatten(x,1)
        
    #     x = self.leakyrelu(self.dropout(self.fc1(x)))
    #     x = self.leakyrelu(self.dropout(self.fc2(x)))
    #     x = self.leakyrelu(self.dropout(self.fc3(x)))
    #     x = self.dropout(self.fc4(x))         
    #     return x


class NeuralNet(nn.Module):
    def __init__(self, input_shape, output_shape):
        super().__init__()
        self.conv_1 = nn.Sequential(
            nn.Conv2d(input_shape, 64, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2)
        )
        self.conv_2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2, 2)
        )
        self.conv_3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(2, 2)
        )
        self.conv_4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(),
            nn.BatchNorm2d(512),
            nn.MaxPool2d(2, 2)
        )
        
        self.gap = nn.AdaptiveAvgPool2d(1)  # Global Average Pooling
        self.fc1 = nn.Linear(512, 256)
        self.fc2 = nn.Linear(256, output_shape)
        
        self.dropout = nn.Dropout(0.3)
        self.leakyrelu = nn.LeakyReLU()

    def forward(self, x):
        x = self.conv_1(x)
        x = self.conv_2(x)
        x = self.conv_3(x)
        x = self.conv_4(x)

        x = self.gap(x)  # Apply GAP
        x = torch.flatten(x, 1)  # Flatten before FC layer

        x = self.leakyrelu(self.dropout(self.fc1(x)))
        x = self.fc2(x)  # Removed dropout from last layer
        return x


In [9]:
self_model = NeuralNet(3,len(train_data.classes)).to(device)

In [10]:
loss_fn = nn.CrossEntropyLoss()
optimizer_self = optim.SGD(self_model.parameters(), lr=0.0005, momentum=0.9)
optimizer_resnet = optim.SGD(resnet_model.parameters(),lr = 0.001,momentum = 0.9)

In [11]:
start_epoch = 0
checkpoint_path = "Models/checkpoint.pth"
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    self_model.load_state_dict(checkpoint['model_state_dict'])
    optimizer_self.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    print(f"Resuming training from epoch {start_epoch}")
else:
    print("No checkpoint found. Starting fresh.")

EPOCHS = 70
for epoch in tqdm(range(start_epoch,EPOCHS)):
    print(f"Training epoch: {epoch}...")

    running_loss = 0.0
    for i, data in enumerate(train_loader):
        
        input,labels = data
        input,labels = input.to(device),labels.to(device)
        optimizer_self.zero_grad()
        outputs = self_model(input)

        loss = loss_fn(outputs,labels)
        loss.backward()
        optimizer_self.step()
        running_loss += loss.item()

    checkpoint = {
            'epoch': epoch,
            'model_state_dict': self_model.state_dict(),
            'optimizer_state_dict': optimizer_self.state_dict(),
        }
    torch.save(checkpoint, checkpoint_path)
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {running_loss/len(train_loader):.4f}, Checkpoint saved!")

    print(f"loss: {running_loss/len(train_loader):.4f} ")

No checkpoint found. Starting fresh.


  0%|                                                                                           | 0/70 [00:00<?, ?it/s]

Training epoch: 0...


  1%|█▏                                                                                 | 1/70 [00:13<15:06, 13.14s/it]

Epoch 1/70, Loss: 4.1567, Checkpoint saved!
loss: 4.1567 
Training epoch: 1...


  3%|██▎                                                                                | 2/70 [00:26<14:49, 13.08s/it]

Epoch 2/70, Loss: 3.6089, Checkpoint saved!
loss: 3.6089 
Training epoch: 2...


  4%|███▌                                                                               | 3/70 [00:39<14:47, 13.24s/it]

Epoch 3/70, Loss: 3.2424, Checkpoint saved!
loss: 3.2424 
Training epoch: 3...


  6%|████▋                                                                              | 4/70 [00:52<14:27, 13.15s/it]

Epoch 4/70, Loss: 2.9570, Checkpoint saved!
loss: 2.9570 
Training epoch: 4...


  7%|█████▉                                                                             | 5/70 [01:05<14:04, 12.99s/it]

Epoch 5/70, Loss: 2.7306, Checkpoint saved!
loss: 2.7306 
Training epoch: 5...


  9%|███████                                                                            | 6/70 [01:18<13:50, 12.98s/it]

Epoch 6/70, Loss: 2.5192, Checkpoint saved!
loss: 2.5192 
Training epoch: 6...


 10%|████████▎                                                                          | 7/70 [01:31<13:35, 12.94s/it]

Epoch 7/70, Loss: 2.3433, Checkpoint saved!
loss: 2.3433 
Training epoch: 7...


 11%|█████████▍                                                                         | 8/70 [01:44<13:33, 13.12s/it]

Epoch 8/70, Loss: 2.1720, Checkpoint saved!
loss: 2.1720 
Training epoch: 8...


 13%|██████████▋                                                                        | 9/70 [01:57<13:16, 13.06s/it]

Epoch 9/70, Loss: 2.0124, Checkpoint saved!
loss: 2.0124 
Training epoch: 9...


 14%|███████████▋                                                                      | 10/70 [02:10<12:58, 12.98s/it]

Epoch 10/70, Loss: 1.8630, Checkpoint saved!
loss: 1.8630 
Training epoch: 10...


 16%|████████████▉                                                                     | 11/70 [02:23<12:40, 12.89s/it]

Epoch 11/70, Loss: 1.7160, Checkpoint saved!
loss: 1.7160 
Training epoch: 11...


 17%|██████████████                                                                    | 12/70 [02:35<12:28, 12.90s/it]

Epoch 12/70, Loss: 1.5762, Checkpoint saved!
loss: 1.5762 
Training epoch: 12...


 19%|███████████████▏                                                                  | 13/70 [02:50<12:35, 13.25s/it]

Epoch 13/70, Loss: 1.4376, Checkpoint saved!
loss: 1.4376 
Training epoch: 13...


 20%|████████████████▍                                                                 | 14/70 [03:03<12:20, 13.23s/it]

Epoch 14/70, Loss: 1.3018, Checkpoint saved!
loss: 1.3018 
Training epoch: 14...


 21%|█████████████████▌                                                                | 15/70 [03:16<12:07, 13.24s/it]

Epoch 15/70, Loss: 1.1725, Checkpoint saved!
loss: 1.1725 
Training epoch: 15...


 23%|██████████████████▋                                                               | 16/70 [03:30<12:05, 13.44s/it]

Epoch 16/70, Loss: 1.0502, Checkpoint saved!
loss: 1.0502 
Training epoch: 16...


 24%|███████████████████▉                                                              | 17/70 [03:44<12:00, 13.59s/it]

Epoch 17/70, Loss: 0.9250, Checkpoint saved!
loss: 0.9250 
Training epoch: 17...


 24%|███████████████████▉                                                              | 17/70 [03:48<11:51, 13.42s/it]


KeyboardInterrupt: 

In [21]:
start_epoch = 0
checkpoint_path = "Models/checkpoint_resnet_1.pth"
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    resnet_model.load_state_dict(checkpoint['model_state_dict'])
    optimizer_resnet.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    print(f"Resuming training from epoch {start_epoch}")
else:
    print("No checkpoint found. Starting fresh.")

EPOCHS = 30
for epoch in tqdm(range(start_epoch,EPOCHS)):
    print(f"Training epoch: {epoch}...")


    running_loss = 0.0
    for i, data in enumerate(train_loader):
        input,labels = data
        input,labels = input.to(device),labels.to(device)
        optimizer_resnet.zero_grad()
        outputs = resnet_model(input)

        loss = loss_fn(outputs,labels)
        loss.backward()
        optimizer_resnet.step()
        running_loss += loss.item()
    checkpoint = {
            'epoch': epoch,
            'model_state_dict': resnet_model.state_dict(),
            'optimizer_state_dict': optimizer_resnet.state_dict(),
        }
    torch.save(checkpoint, checkpoint_path)
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {running_loss/len(train_loader):.4f}, Checkpoint saved!")
    print(f"loss: {running_loss/len(train_loader):.4f} ")

Resuming training from epoch 4


  0%|                                                                                           | 0/26 [00:00<?, ?it/s]

Training epoch: 4...


  4%|███▏                                                                               | 1/26 [00:17<07:10, 17.23s/it]

Epoch 5/30, Loss: 1.2499, Checkpoint saved!
loss: 1.2499 
Training epoch: 5...


  8%|██████▍                                                                            | 2/26 [00:34<06:47, 17.00s/it]

Epoch 6/30, Loss: 1.0602, Checkpoint saved!
loss: 1.0602 
Training epoch: 6...


 12%|█████████▌                                                                         | 3/26 [00:51<06:31, 17.04s/it]

Epoch 7/30, Loss: 0.8807, Checkpoint saved!
loss: 0.8807 
Training epoch: 7...


 15%|████████████▊                                                                      | 4/26 [01:08<06:13, 16.99s/it]

Epoch 8/30, Loss: 0.7259, Checkpoint saved!
loss: 0.7259 
Training epoch: 8...


 19%|███████████████▉                                                                   | 5/26 [01:25<05:57, 17.03s/it]

Epoch 9/30, Loss: 0.6007, Checkpoint saved!
loss: 0.6007 
Training epoch: 9...


 23%|███████████████████▏                                                               | 6/26 [01:41<05:37, 16.87s/it]

Epoch 10/30, Loss: 0.4971, Checkpoint saved!
loss: 0.4971 
Training epoch: 10...


 27%|██████████████████████▎                                                            | 7/26 [02:00<05:29, 17.36s/it]

Epoch 11/30, Loss: 0.4262, Checkpoint saved!
loss: 0.4262 
Training epoch: 11...


 27%|██████████████████████▎                                                            | 7/26 [02:07<05:47, 18.28s/it]


KeyboardInterrupt: 

In [22]:
torch.save(self_model.state_dict(),'Models/trained_self.pth')
torch.save(resnet_model.state_dict(),'Models/trained_resnet.pth')

In [23]:
self_model = NeuralNet(3,len(train_data.classes)).to(device)
self_model.load_state_dict(torch.load('Models/trained_self.pth'))
resnet_model = resnet_model.to(device)
resnet_model.load_state_dict(torch.load('Models/trained_resnet.pth'))

<All keys matched successfully>

In [24]:
correct = 0
total = 0
total_loss = 0  # Track loss

self_model.eval()
criterion = nn.CrossEntropyLoss()  # Define loss function

with torch.inference_mode():
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        
        outputs = self_model(images)
        loss = criterion(outputs, labels)  # Compute loss
        total_loss += loss.item()  # Accumulate loss
        
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Compute final metrics
accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)  # Compute average loss

print(f"Test Accuracy: {accuracy:.2f}%")
print(f"Validation Loss: {average_loss:.4f}")


Test Accuracy: 48.69%
Validation Loss: 1.9860


In [25]:
correct = 0
total = 0
total_loss = 0  # Track loss

resnet_model.eval()
criterion = nn.CrossEntropyLoss()  # Define loss function

with torch.inference_mode():
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        
        outputs = resnet_model(images)
        loss = criterion(outputs, labels)  # Compute loss
        total_loss += loss.item()  # Accumulate loss
        
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Compute final metrics
accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)  # Compute average loss

print(f"Test Accuracy: {accuracy:.2f}%")
print(f"Validation Loss: {average_loss:.4f}")


Test Accuracy: 51.95%
Validation Loss: 2.4296


In [26]:
new_transform = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

def load_image(image_path):
    image = Image.open(image_path)
    image = new_transform(image)
    image = image.unsqueeze(0)
    return image

image_paths = ['example.jpg','example_1.jpeg','example_2.jpg']
images = [load_image(img) for img in image_paths]

self_model.eval()
with torch.inference_mode():
    for image in images:
        image = image.to(device)
        output = self_model(image)
        _,predicted = torch.max(output,1)
        print(f"Prediction: {class_names[predicted.item()]}")

Prediction: mountain
Prediction: tractor
Prediction: fox


In [27]:
new_transform = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

def load_image(image_path):
    image = Image.open(image_path)
    image = new_transform(image)
    image = image.unsqueeze(0)
    return image

image_paths = ['example.jpg','example_1.jpeg','example_2.jpg']
images = [load_image(img) for img in image_paths]

self_model.eval()
with torch.inference_mode():
    for image in images:
        image = image.to(device)
        output = resnet_model(image)
        _,predicted = torch.max(output,1)
        print(f"Prediction: {class_names[predicted.item()]}")

Prediction: turtle
Prediction: camel
Prediction: lion
