In [None]:
import torch
from torch import nn

import numpy as np
from PIL import Image
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
from torchvision import models
from tqdm import tqdm
import os

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
    # transforms.Resize(224,224)
])

In [None]:
train_data = torchvision.datasets.CIFAR100(root='data',download = True,transform = transform,train = True)
test_data = torchvision.datasets.CIFAR100(root='data',download = True,transform = transform,train = False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to data/cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:03<00:00, 45.9MB/s]


Extracting data/cifar-100-python.tar.gz to data
Files already downloaded and verified


In [None]:
import torch
import numpy as np
import torchvision
from torchvision.datasets import CIFAR100
import torchvision.transforms as transforms
import os

# Define selected class indices
selected_classes = [0, 8, 9, 10, 11, 13, 16, 20, 22, 23, 25, 28, 35, 37, 39, 40, 46, 48, 50, 53, 61, 68, 79, 82, 84, 86, 87, 98]

# Create a mapping from old labels to new labels
class_map = {old_label: new_label for new_label, old_label in enumerate(selected_classes)}

# Custom CIFAR-100 Dataset
class CustomCIFAR100(CIFAR100):
    def __init__(self, root, train=True, transform=None, download=False, selected_classes=None):
        super().__init__(root=root, train=train, download=download)
        self.transform = transform  # ✅ Store transform function

        if selected_classes is not None:
            # ✅ Filter dataset based on selected classes
            indices = [i for i, label in enumerate(self.targets) if label in selected_classes]
            self.data = np.array(self.data)[indices]  # Convert to NumPy array for slicing
            self.targets = np.array([self.targets[i] for i in indices])

            # ✅ Remap labels (old CIFAR-100 labels → new 0 to 27 labels)
            self.class_map = {old_label: new_label for new_label, old_label in enumerate(selected_classes)}
            self.targets = np.array([self.class_map[label] for label in self.targets])

        self.classes = [self.classes[i] for i in selected_classes]  # ✅ Update class names

    def __getitem__(self, index):
        img, target = self.data[index], self.targets[index]

        # ✅ Apply transform directly to the image data (NumPy array or PIL Image)
        if self.transform:
            img = self.transform(img)

        return img, target

# ✅ Define transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# ✅ Create new datasets
train_custom = CustomCIFAR100(root='./data', train=True, transform=transform, download=True, selected_classes=selected_classes)
test_custom = CustomCIFAR100(root='./data', train=False, transform=transform, download=True, selected_classes=selected_classes)

# ✅ Print dataset size
print(f"Filtered Train Dataset Size: {len(train_custom)}")
print(f"Filtered Test Dataset Size: {len(test_custom)}")

# ✅ Verify class distribution
print(f"Unique classes in new train dataset: {set(train_custom.targets)}")
print(f"Unique classes in new test dataset: {set(test_custom.targets)}")


Files already downloaded and verified
Files already downloaded and verified
Filtered Train Dataset Size: 14000
Filtered Test Dataset Size: 2800
Unique classes in new train dataset: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}
Unique classes in new test dataset: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}


In [None]:
class_names = selected_classes  # List of selected class names

print("CIFAR-100 Classes (filtered):")
print(class_names, len(class_names))

CIFAR-100 Classes (filtered):
[0, 8, 9, 10, 11, 13, 16, 20, 22, 23, 25, 28, 35, 37, 39, 40, 46, 48, 50, 53, 61, 68, 79, 82, 84, 86, 87, 98] 28


In [None]:
train_loader = torch.utils.data.DataLoader(
    train_custom,
    batch_size = 64,
    shuffle=  True,
    num_workers = 2
)
test_loader = torch.utils.data.DataLoader(
    test_custom,
    batch_size = 64,
    shuffle=  True,
    num_workers = 2
)

In [None]:
image,label = train_custom[0]
print(f"{image,label}\nimage shape = {image.shape}")

(tensor([[[0.9608, 0.9451, 0.9373,  ..., 0.9608, 0.9608, 0.9529],
         [0.9608, 0.9451, 0.9373,  ..., 0.9686, 0.9686, 0.9608],
         [0.9686, 0.9608, 0.9608,  ..., 0.9608, 0.9608, 0.9608],
         ...,
         [0.7333, 0.7333, 0.7647,  ..., 0.5608, 0.6235, 0.8275],
         [0.7647, 0.7804, 0.7961,  ..., 0.6000, 0.6627, 0.8353],
         [0.8275, 0.8353, 0.8431,  ..., 0.7176, 0.7490, 0.8196]],

        [[0.9608, 0.9529, 0.9451,  ..., 0.9608, 0.9608, 0.9608],
         [0.9686, 0.9529, 0.9373,  ..., 0.9686, 0.9765, 0.9686],
         [0.9686, 0.9451, 0.9216,  ..., 0.9529, 0.9529, 0.9529],
         ...,
         [0.6706, 0.6157, 0.6235,  ..., 0.3804, 0.5137, 0.7961],
         [0.7490, 0.7176, 0.6941,  ..., 0.4510, 0.5765, 0.8196],
         [0.8275, 0.8196, 0.8039,  ..., 0.6392, 0.6941, 0.8039]],

        [[0.9451, 0.9059, 0.8745,  ..., 0.9294, 0.9294, 0.9294],
         [0.9216, 0.8667, 0.8353,  ..., 0.8980, 0.9059, 0.9059],
         [0.9137, 0.8588, 0.8275,  ..., 0.8667, 0.8824, 0

In [None]:
# Load pretrained ResNet-18 model
resnet_model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Modify the final layer to match CIFAR-10 (10 classes)
num_features = resnet_model.fc.in_features  # Get the input size of the last layer
resnet_model.fc = nn.Linear(num_features, 28)  # Replace with 10 output classes

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet_model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 84.8MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
class NeuralNet(nn.Module):
    def __init__(self,input_shape,output_shape):
        super().__init__()
        self.conv_1 = nn.Sequential(
            nn.Conv2d(in_channels = input_shape,
                     out_channels = 12,
                     kernel_size = 3,
                     stride = 1,
                     padding = 1), #output size = (12,32,32)
            nn.LeakyReLU(),
            nn.BatchNorm2d(12),
            nn.MaxPool2d(kernel_size = 2,
                        stride = 2) #output = (12,16,16)
        )
        self.conv_2 = nn.Sequential(
            nn.Conv2d(in_channels = 12,
                     out_channels = 24,
                     kernel_size = 3,
                     stride = 1,
                     padding = 1), #output size = (24,16,16)
            nn.LeakyReLU(),
            nn.BatchNorm2d(24),
            nn.MaxPool2d(kernel_size = 2,
                        stride = 2) # output size =(24,8,8)
        )
        self.conv_3 = nn.Sequential(
            nn.Conv2d(in_channels = 24,
                     out_channels = 48,
                     kernel_size = 3,
                     stride = 1,
                     padding = 1), #output size = (48,8,8)
            nn.LeakyReLU(),
            nn.BatchNorm2d(48),
            nn.MaxPool2d(kernel_size = 2,
                        stride = 2) # output size =(48,4,4)
        )
        self.conv_4 = nn.Sequential(
            nn.Conv2d(in_channels = 48,
                     out_channels = 96,
                     kernel_size = 3,
                     stride = 1,
                     padding = 1), #output size = (96,4,4)
            nn.LeakyReLU(),
            nn.BatchNorm2d(96),
            nn.MaxPool2d(kernel_size = 2,
                        stride = 2) # output size =(96,2,2)
        )
        self.fc1 = nn.Linear( #fallten
            96*2*2,120
        )
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,32)
        self.fc4 = nn.Linear(32,output_shape) #fc = fully connected
        self.dropout = nn.Dropout(0.3)
        self.leakyrelu = nn.LeakyReLU()

    def forward(self,x):
        x = self.conv_1(x)
        x = self.conv_2(x)
        x = self.conv_3(x)
        x = self.conv_4(x)

        x = torch.flatten(x,1)

        x = self.leakyrelu(self.dropout(self.fc1(x)))
        x = self.leakyrelu(self.dropout(self.fc2(x)))
        x = self.leakyrelu(self.dropout(self.fc3(x)))
        x = self.dropout(self.fc4(x))
        return x


# class NeuralNet(nn.Module):
#     def __init__(self, input_shape, output_shape):
#         super().__init__()
#         self.conv_1 = nn.Sequential(
#             nn.Conv2d(input_shape, 64, kernel_size=3, stride=1, padding=1),
#             nn.LeakyReLU(),
#             nn.BatchNorm2d(64),
#             nn.MaxPool2d(2, 2)
#         )
#         self.conv_2 = nn.Sequential(
#             nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
#             nn.LeakyReLU(),
#             nn.BatchNorm2d(128),
#             nn.MaxPool2d(2, 2)
#         )
#         self.conv_3 = nn.Sequential(
#             nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
#             nn.LeakyReLU(),
#             nn.BatchNorm2d(256),
#             nn.MaxPool2d(2, 2)
#         )
#         self.conv_4 = nn.Sequential(
#             nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
#             nn.LeakyReLU(),
#             nn.BatchNorm2d(512),
#             nn.MaxPool2d(2, 2)
#         )

#         self.gap = nn.AdaptiveAvgPool2d(1)  # Global Average Pooling
#         self.fc1 = nn.Linear(512, 256)
#         self.fc2 = nn.Linear(256, output_shape)

#         self.dropout = nn.Dropout(0.3)
#         self.leakyrelu = nn.LeakyReLU()

#     def forward(self, x):
#         x = self.conv_1(x)
#         x = self.conv_2(x)
#         x = self.conv_3(x)
#         x = self.conv_4(x)

#         x = self.gap(x)  # Apply GAP
#         x = torch.flatten(x, 1)  # Flatten before FC layer

#         x = self.leakyrelu(self.dropout(self.fc1(x)))
#         x = self.fc2(x)  # Removed dropout from last layer
#         return x


In [None]:
self_model = NeuralNet(3,28).to(device)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer_self = optim.SGD(self_model.parameters(), lr=0.001, momentum=0.9)
optimizer_resnet = optim.SGD(resnet_model.parameters(),lr = 0.001,momentum = 0.9)

In [None]:
start_epoch = 0
checkpoint_path = "Models/checkpoint.pth"
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    self_model.load_state_dict(checkpoint['model_state_dict'])
    optimizer_self.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    print(f"Resuming training from epoch {start_epoch}")
else:
    print("No checkpoint found. Starting fresh.")

EPOCHS = 70
for epoch in tqdm(range(start_epoch,EPOCHS)):
    print(f"Training epoch: {epoch}...")

    running_loss = 0.0
    for i, data in enumerate(train_loader):

        input,labels = data
        input,labels = input.to(device),labels.to(device)
        optimizer_self.zero_grad()
        outputs = self_model(input)


        loss = loss_fn(outputs,labels)
        loss.backward()
        optimizer_self.step()
        running_loss += loss.item()

    checkpoint = {
            'epoch': epoch,
            'model_state_dict': self_model.state_dict(),
            'optimizer_state_dict': optimizer_self.state_dict(),
        }
    torch.save(checkpoint, checkpoint_path)
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {running_loss/len(train_loader):.4f}, Checkpoint saved!")

    print(f"loss: {running_loss/len(train_loader):.4f} ")

  checkpoint = torch.load(checkpoint_path)


Resuming training from epoch 7


  0%|          | 0/63 [00:00<?, ?it/s]

Training epoch: 7...


  2%|▏         | 1/63 [00:02<02:55,  2.83s/it]

Epoch 8/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 8...


  3%|▎         | 2/63 [00:06<03:11,  3.14s/it]

Epoch 9/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 9...


  5%|▍         | 3/63 [00:08<02:51,  2.87s/it]

Epoch 10/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 10...


  6%|▋         | 4/63 [00:11<02:42,  2.76s/it]

Epoch 11/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 11...


  8%|▊         | 5/63 [00:13<02:35,  2.67s/it]

Epoch 12/70, Loss: 0.0210, Checkpoint saved!
loss: 0.0210 
Training epoch: 12...


 10%|▉         | 6/63 [00:16<02:40,  2.81s/it]

Epoch 13/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 13...


 11%|█         | 7/63 [00:20<02:42,  2.90s/it]

Epoch 14/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 14...


 13%|█▎        | 8/63 [00:22<02:32,  2.77s/it]

Epoch 15/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 15...


 14%|█▍        | 9/63 [00:25<02:26,  2.71s/it]

Epoch 16/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 16...


 16%|█▌        | 10/63 [00:27<02:20,  2.66s/it]

Epoch 17/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 17...


 17%|█▋        | 11/63 [00:30<02:29,  2.87s/it]

Epoch 18/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 18...


 19%|█▉        | 12/63 [00:33<02:23,  2.80s/it]

Epoch 19/70, Loss: 0.0210, Checkpoint saved!
loss: 0.0210 
Training epoch: 19...


 21%|██        | 13/63 [00:36<02:16,  2.73s/it]

Epoch 20/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 20...


 22%|██▏       | 14/63 [00:38<02:11,  2.68s/it]

Epoch 21/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 21...


 24%|██▍       | 15/63 [00:41<02:08,  2.67s/it]

Epoch 22/70, Loss: 0.0209, Checkpoint saved!
loss: 0.0209 
Training epoch: 22...


 24%|██▍       | 15/63 [00:41<02:12,  2.77s/it]


KeyboardInterrupt: 

In [None]:
start_epoch = 0
checkpoint_path = "Models/checkpoint_resnet_1.pth"
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    resnet_model.load_state_dict(checkpoint['model_state_dict'])
    optimizer_resnet.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    print(f"Resuming training from epoch {start_epoch}")
else:
    print("No checkpoint found. Starting fresh.")

EPOCHS = 30
for epoch in tqdm(range(start_epoch,EPOCHS)):
    print(f"Training epoch: {epoch}...")


    running_loss = 0.0
    for i, data in enumerate(train_loader):
        input,labels = data
        input,labels = input.to(device),labels.to(device)
        optimizer_resnet.zero_grad()
        outputs = resnet_model(input)

        loss = loss_fn(outputs,labels)
        loss.backward()
        optimizer_resnet.step()
        running_loss += loss.item()
    checkpoint = {
            'epoch': epoch,
            'model_state_dict': resnet_model.state_dict(),
            'optimizer_state_dict': optimizer_resnet.state_dict(),
        }
    torch.save(checkpoint, checkpoint_path)
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {running_loss/len(train_loader):.4f}, Checkpoint saved!")
    print(f"loss: {running_loss/len(train_loader):.4f} ")

  checkpoint = torch.load(checkpoint_path)


Resuming training from epoch 21


  0%|          | 0/9 [00:00<?, ?it/s]

Training epoch: 21...


 11%|█         | 1/9 [00:04<00:36,  4.52s/it]

Epoch 22/30, Loss: 0.0387, Checkpoint saved!
loss: 0.0387 
Training epoch: 22...


 22%|██▏       | 2/9 [00:09<00:32,  4.70s/it]

Epoch 23/30, Loss: 0.0402, Checkpoint saved!
loss: 0.0402 
Training epoch: 23...


 33%|███▎      | 3/9 [00:14<00:28,  4.76s/it]

Epoch 24/30, Loss: 0.0211, Checkpoint saved!
loss: 0.0211 
Training epoch: 24...


 44%|████▍     | 4/9 [00:18<00:23,  4.65s/it]

Epoch 25/30, Loss: 0.0040, Checkpoint saved!
loss: 0.0040 
Training epoch: 25...


 56%|█████▌    | 5/9 [00:23<00:19,  4.86s/it]

Epoch 26/30, Loss: 0.0009, Checkpoint saved!
loss: 0.0009 
Training epoch: 26...


 56%|█████▌    | 5/9 [00:26<00:21,  5.39s/it]


KeyboardInterrupt: 

In [None]:
torch.save(self_model.state_dict(),'Models/trained_self.pth')
torch.save(resnet_model.state_dict(),'Models/trained_resnet.pth')

In [None]:
self_model = NeuralNet(3,len(train_data.classes)).to(device)
self_model.load_state_dict(torch.load('Models/trained_self.pth'))
resnet_model = resnet_model.to(device)
resnet_model.load_state_dict(torch.load('Models/trained_resnet.pth'))

  self_model.load_state_dict(torch.load('Models/trained_self.pth'))
  resnet_model.load_state_dict(torch.load('Models/trained_resnet.pth'))


<All keys matched successfully>

In [None]:
correct = 0
total = 0
total_loss = 0  # Track loss
self_model.eval()
criterion = nn.CrossEntropyLoss()  # Define loss function

with torch.no_grad():  # Use torch.no_grad() for better compatibility
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = self_model(images)
        loss = criterion(outputs, labels)  # Compute loss
        total_loss += loss.item() * labels.size(0)  # Scale loss properly

        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Compute final metrics
accuracy = 100 * correct / total
average_loss = total_loss / total  # Correct loss averaging

print(f"Test Accuracy: {accuracy:.2f}%")
print(f"Validation Loss: {average_loss:.4f}")


Test Accuracy: 62.07%
Validation Loss: 1.8443


In [None]:
correct = 0
total = 0
total_loss = 0  # Track loss

resnet_model.eval()
criterion = nn.CrossEntropyLoss()  # Define loss function

with torch.inference_mode():
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)

        outputs = resnet_model(images)
        loss = criterion(outputs, labels)  # Compute loss
        total_loss += loss.item()  # Accumulate loss

        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Compute final metrics
accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)  # Compute average loss

print(f"Test Accuracy: {accuracy:.2f}%")
print(f"Validation Loss: {average_loss:.4f}")


Test Accuracy: 69.54%
Validation Loss: 2.1186


In [None]:
new_transform = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

def load_image(image_path):
    image = Image.open(image_path)
    image = new_transform(image)
    image = image.unsqueeze(0)
    return image

image_paths = ['apple.jpg','bycycle.jpg','bottle.jpg','bowl.jpg','clock.jpg','chair.jpg','keyboard.jpg']
images = [load_image(img) for img in image_paths]

self_model.eval()
with torch.inference_mode():
    for image in images:
        image = image.to(device)
        output = resnet_model(image)
        _,predicted = torch.max(output,1)
        print(f"Prediction: {class_names[predicted.item()]}")

Prediction: 0
Prediction: 8
Prediction: 9
Prediction: 10
Prediction: 22
Prediction: 50
Prediction: 39


In [None]:
new_transform = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

def load_image(image_path):
    image = Image.open(image_path)
    image = new_transform(image)
    image = image.unsqueeze(0)
    return image

image_paths = ['apple.jpg','bycycle.jpg','bottle.jpg','bowl.jpg','clock.jpg','chair.jpg','keyboard.jpg']
images = [load_image(img) for img in image_paths]

self_model.eval()
with torch.inference_mode():
    for image in images:
        image = image.to(device)
        output = resnet_model(image)
        _,predicted = torch.max(output,1)
        print(f"Prediction: {class_names[predicted.item()]}")

Prediction: 0
Prediction: 8
Prediction: 9
Prediction: 10
Prediction: 22
Prediction: 79
Prediction: 39
