<a href="https://colab.research.google.com/github/Ruheena-S/Hierarchical-classification-Loss-Functions-in-Image-Classification/blob/main/ResNet18_CIFAR100.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# !pip3 install torch_optimizer torchmetrics
# !nvidia-smi

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Sun Mar 12 23:41:14 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   51C    P0    28W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                     

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
#from torch_optimizer import Ranger
from torchvision.datasets import CIFAR100
from torch.utils.data import DataLoader
from torch.utils.data import random_split

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
torch.manual_seed(43)

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
# Define transforms for data augmentation
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5071, 0.4867, 0.4408], std=[0.2675, 0.2565, 0.2761])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5071, 0.4867, 0.4408], std=[0.2675, 0.2565, 0.2761])
])

In [9]:
# # Load CIFAR100 dataset
# trainset = CIFAR100(root='./data', train=True, download=True, transform=transform_train)
# trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

# valset = CIFAR100(root='./data', train=True, download=True, transform=transform_test)
# valloader = DataLoader(valset, batch_size=100, shuffle=False, num_workers=2)

# testset = CIFAR100(root='./data', train=False, download=True, transform=transform_test)
# testloader = DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

dataset = CIFAR100(root='data/', download=True, transform=transform_train)
test_dataset = CIFAR100(root='data/', train=False, transform=transform_test)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to data/cifar-100-python.tar.gz


  0%|          | 0/169001437 [00:00<?, ?it/s]

Extracting data/cifar-100-python.tar.gz to data/


In [10]:
val_size = 5000
train_size = len(dataset) - val_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])


trainloader = DataLoader(train_ds, batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
valloader = DataLoader(val_ds, batch_size=100, num_workers=4, pin_memory=True)
testloader = DataLoader(test_dataset, batch_size =100, num_workers=4, pin_memory=True)



In [11]:
# Load pre-trained ResNet18 model
resnet18 = torchvision.models.resnet18(pretrained=False)
resnet18.conv1 = nn.Conv2d(3, 64, kernel_size = (3,3), padding = (1, 1), bias = False)
resnet18.maxpool = nn.Identity()

# Freeze all layers except for the last one
for param in resnet18.parameters():
    param.requires_grad = True
resnet18.fc = nn.Linear(512, 100)

resnet18.to(device)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), p

In [12]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss().to(device)
# optimizer = optim.SGD(resnet18.fc.parameters(), lr=0.001, momentum=0.9)
#optimizer = Ranger(resnet18.parameters(), lr=0.001, weight_decay=0.005) 
optimizer = optim.Adam(resnet18.parameters(), lr=0.001)

In [13]:
# Train the model

best_loss=10.0

for epoch in range(20):  # Loop over the dataset multiple times

    running_loss = 0.0
    for i, (inputs,labels) in enumerate(trainloader, 0):
        # Get the inputs; data is a list of [inputs, labels]
        #inputs, labels = data.to(device)
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = resnet18(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # Print every 100 mini-batches
            print('[%d, %5d] train loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

    # Validate the model
    val_loss = 0.0
    val_total = 0
    val_correct = 0
    with torch.no_grad():
        for (images,labels) in valloader:         
            images = images.to(device)
            labels = labels.to(device)
            outputs = resnet18(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    valid_Loss = val_loss / len(valloader)
    print('[%d] validation loss: %.3f, accuracy: %d %%' % (
        epoch + 1,valid_Loss , 100 * val_correct / val_total))
    
    if best_loss > valid_Loss:
      
      best_loss = valid_Loss
      #save the model
      torch.save(resnet18, "/content/drive/MyDrive/MTP_Phase2/saved_models/resnet18_cifar100_rangerOPT.pth")



[1,   100] train loss: 4.121
[1,   200] train loss: 3.736
[1,   300] train loss: 3.474
[1] validation loss: 3.307, accuracy: 18 %
[2,   100] train loss: 3.101
[2,   200] train loss: 2.969
[2,   300] train loss: 2.805
[2] validation loss: 2.710, accuracy: 30 %
[3,   100] train loss: 2.521
[3,   200] train loss: 2.436
[3,   300] train loss: 2.330
[3] validation loss: 2.360, accuracy: 37 %
[4,   100] train loss: 2.161
[4,   200] train loss: 2.045
[4,   300] train loss: 2.032
[4] validation loss: 2.062, accuracy: 43 %
[5,   100] train loss: 1.834
[5,   200] train loss: 1.855
[5,   300] train loss: 1.794
[5] validation loss: 1.867, accuracy: 48 %
[6,   100] train loss: 1.641
[6,   200] train loss: 1.634
[6,   300] train loss: 1.614
[6] validation loss: 1.766, accuracy: 51 %
[7,   100] train loss: 1.499
[7,   200] train loss: 1.478
[7,   300] train loss: 1.460
[7] validation loss: 1.663, accuracy: 53 %
[8,   100] train loss: 1.305
[8,   200] train loss: 1.349
[8,   300] train loss: 1.354
[8]

In [15]:
for epoch in range(5):  # Loop over the dataset multiple times

    running_loss = 0.0
    for i, (inputs,labels) in enumerate(trainloader, 0):
        # Get the inputs; data is a list of [inputs, labels]
        #inputs, labels = data.to(device)
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = resnet18(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # Print every 100 mini-batches
            print('[%d, %5d] train loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

    # Validate the model
    val_loss = 0.0
    val_total = 0
    val_correct = 0
    with torch.no_grad():
        for (images,labels) in valloader:         
            images = images.to(device)
            labels = labels.to(device)
            outputs = resnet18(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    valid_Loss = val_loss / len(valloader)
    print('[%d] validation loss: %.3f, accuracy: %d %%' % (
        epoch + 1,valid_Loss , 100 * val_correct / val_total))
    
    if best_loss > valid_Loss:
      
      best_loss = valid_Loss
      #save the model
      torch.save(resnet18, "/content/drive/MyDrive/MTP_Phase2/saved_models/resnet18_cifar100_rangerOPT.pth")



[1,   100] train loss: 0.346
[1,   200] train loss: 0.380
[1,   300] train loss: 0.399
[1] validation loss: 1.495, accuracy: 64 %
[2,   100] train loss: 0.315
[2,   200] train loss: 0.353
[2,   300] train loss: 0.362
[2] validation loss: 1.510, accuracy: 64 %
[3,   100] train loss: 0.274
[3,   200] train loss: 0.297
[3,   300] train loss: 0.329
[3] validation loss: 1.511, accuracy: 65 %
[4,   100] train loss: 0.241
[4,   200] train loss: 0.275
[4,   300] train loss: 0.300
[4] validation loss: 1.608, accuracy: 64 %
[5,   100] train loss: 0.245
[5,   200] train loss: 0.252
[5,   300] train loss: 0.272
[5] validation loss: 1.606, accuracy: 65 %


In [16]:
# Test the model
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for (inputs,labels) in testloader:
        
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = resnet18(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Test accuracy: %.2f%%' % (100 * correct / total))
test_loss /= total
print('Test loss: %.3f' % (test_loss))

Test accuracy: 62.31%
Test loss: 1.785


In [17]:
modelmodel = torch.load("/content/drive/MyDrive/MTP_Phase2/saved_models/resnet18_cifar100_rangerOPT.pth")


In [18]:
modelmodel.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), p

In [19]:
# Test the model after loading the best model
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for (inputs,labels) in testloader:
        
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = modelmodel(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Test accuracy: %.2f%%' % (100 * correct / total))
test_loss /= total
print('Test loss: %.3f' % (test_loss))

Test accuracy: 60.04%
Test loss: 1.458
