<a href="https://colab.research.google.com/github/AaryanSahu/SAiDL-Summer-Assignment-2023/blob/main/ML_CORE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [None]:
# Load CIFAR 100 dataset
trainset = torchvision.datasets.CIFAR100(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR100(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100,
                                         shuffle=False, num_workers=2)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:12<00:00, 13014078.38it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
# # Define CNN model
# class CNN(nn.Module):
#     def __init__(self):
#         super(CNN, self).__init__()
#         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
#         self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.fc1 = nn.Linear(64 * 8 * 8, 512)
#         self.fc2 = nn.Linear(512, 100)

#     def forward(self, x):
#         x = self.pool(nn.functional.relu(self.conv1(x)))
#         x = self.pool(nn.functional.relu(self.conv2(x)))
#         x = x.view(-1, 64 * 8 * 8)
#         x = nn.functional.relu(self.fc1(x))
#         x = self.fc2(x)
#         return x

# # Initialize CNN model
# net = CNN().to(device)

In [None]:
# Pretrain ResNet model on ImageNet dataset
pretrained_resnet = torchvision.models.resnet18(pretrained=True)
pretrained_resnet.fc = torch.nn.Linear(512, 100) # replace last layer with new output layer
pretrained_resnet = pretrained_resnet.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 196MB/s]


In [None]:
# # Define loss function and optimizer for pretraining
# criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(pretrained_resnet.parameters(), lr=0.1, momentum=0.9)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(pretrained_resnet.parameters(), lr=0.01, momentum=0.7)

In [None]:
# Train CNN model
for epoch in range(20):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = pretrained_resnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 50 == 49:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 50))
            running_loss = 0.0
            
print('Finished Training')


[1,    50] loss: 4.128
[1,   100] loss: 3.281
[1,   150] loss: 2.955
[1,   200] loss: 2.672
[1,   250] loss: 2.541
[1,   300] loss: 2.460
[1,   350] loss: 2.423
[2,    50] loss: 2.148
[2,   100] loss: 2.111
[2,   150] loss: 2.090
[2,   200] loss: 2.076
[2,   250] loss: 2.085
[2,   300] loss: 2.032
[2,   350] loss: 2.008
[3,    50] loss: 1.809
[3,   100] loss: 1.855
[3,   150] loss: 1.819
[3,   200] loss: 1.796
[3,   250] loss: 1.821
[3,   300] loss: 1.824
[3,   350] loss: 1.789
[4,    50] loss: 1.622
[4,   100] loss: 1.652
[4,   150] loss: 1.630
[4,   200] loss: 1.656
[4,   250] loss: 1.628
[4,   300] loss: 1.691
[4,   350] loss: 1.621
[5,    50] loss: 1.488
[5,   100] loss: 1.487
[5,   150] loss: 1.519
[5,   200] loss: 1.501
[5,   250] loss: 1.506
[5,   300] loss: 1.566
[5,   350] loss: 1.535
[6,    50] loss: 1.342
[6,   100] loss: 1.361
[6,   150] loss: 1.386
[6,   200] loss: 1.461
[6,   250] loss: 1.401
[6,   300] loss: 1.420
[6,   350] loss: 1.386
[7,    50] loss: 1.252
[7,   100] 

In [None]:
# Test CNN model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = pretrained_resnet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 55 %


In [None]:
# Define the Adaptive Softmax
from torch.nn.utils.rnn import pad_sequence
class AdaptiveSoftmax(nn.Module):
    def __init__(self, in_features, cutoff, div_val=4):
        super(AdaptiveSoftmax, self).__init__()
        self.cutoff = cutoff
        self.head_size = [in_features // div_val, in_features // div_val, in_features]
        self.tail_size = [self.cutoff[0], self.cutoff[1] - self.cutoff[0], self.cutoff[2] - self.cutoff[1]]
        self.head = nn.Sequential(
            nn.Linear(in_features, self.head_size[0]),
            nn.Linear(self.head_size[0], self.head_size[1]),
            nn.Linear(self.head_size[1], self.head_size[2])
        )
        self.tail = nn.ModuleList([
            nn.Sequential(nn.Linear(in_features, self.tail_size[i])) for i in range(len(self.tail_size))
        ])

    def forward(self, x, target=None):
        if target is None:
            head_out = self.head(x)
            tail_out = [self.tail[i](x) for i in range(len(self.tail))]
            output = [torch.cat((head_out, tail_out[i]), dim=1) for i in range(len(self.tail))]
            return output
        else:
            head_out = x[:, :self.cutoff[0]]
            tail_out = [x[:, self.cutoff[i]:self.cutoff[i+1]] for i in range(len(self.cutoff)-1)]
            output = [torch.cat((head_out, tail_out[i]), dim=1) for i in range(len(self.tail))]
            return output[target]

In [None]:
from torchvision.models import resnet18
net = resnet18(num_classes=100)


In [None]:
# Define the loss function and the optimizer
#max_seq_len = max([len(seq) for seq in trainset])
cutoff = [2000, 6000, max_seq_len]
in_features = max(cutoff)
if net.fc.in_features < in_features:
    net.fc = nn.Linear(in_features, net.fc.out_features)
adaptive_softmax = AdaptiveSoftmax(in_features, cutoff)
criterion = nn.AdaptiveLogSoftmaxWithLoss(adaptive_softmax.head_size[-1], 100, cutoffs=cutoff)
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)


RuntimeError: ignored