In [None]:
# All imports and global variables
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from tqdm import tqdm

import numpy as np
import torch
import torchvision as tv

from PIL import Image
import os
import math

# Set random seed
np.random.seed(42)
torch.manual_seed(42)
validation = False

# Number of epochs
number_of_epoch = 25
image_size = 128
batch_size = 32
learning_rate = 0.0005

In [None]:
# Set device: GPU or CPU. Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
# All images are to be resized to grayscale images
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

train_dataset = tv.datasets.ImageFolder(root="/kaggle/input/iith-dl-contest-2024/train/train", transform=transform)
if validation:
# Split the dataset into training and validation sets
    train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [int(0.8 * len(train_dataset)), len(train_dataset) - int(0.8 * len(train_dataset))])
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=8)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8)

classes = train_dataset.classes
print(classes)

In [None]:
class fire(nn.Module):
    def __init__(self, inplanes, squeeze_planes, expand_planes):
        super(fire, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1, stride=1)
        self.bn1 = nn.BatchNorm2d(squeeze_planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=1, stride=1)
        self.bn2 = nn.BatchNorm2d(expand_planes)
        self.conv3 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(expand_planes)
        self.relu2 = nn.ReLU(inplace=True)

        # using MSR initilization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
                m.weight.data.normal_(0, math.sqrt(2./n))

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        out1 = self.conv2(x)
        out1 = self.bn2(out1)
        out2 = self.conv3(x)
        out2 = self.bn3(out2)
        out = torch.cat([out1, out2], 1)
        out = self.relu2(out)
        return out


class SqueezeNet(nn.Module):
    def __init__(self):
        super(SqueezeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1) # 32
        self.bn1 = nn.BatchNorm2d(96)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 16
        self.fire2 = fire(96, 16, 64)
        self.fire3 = fire(128, 16, 64)
        self.fire4 = fire(128, 32, 128)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 8
        self.fire5 = fire(256, 32, 128)
        self.fire6 = fire(256, 48, 192)
        self.fire7 = fire(384, 48, 192)
        self.fire8 = fire(384, 64, 256)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2) # 4
        self.fire9 = fire(512, 64, 256)
        self.conv2 = nn.Conv2d(512, 10, kernel_size=1, stride=1)
        self.avg_pool = nn.AvgPool2d(kernel_size=4, stride=4)
        self.softmax = nn.LogSoftmax(dim=1)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()


    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool1(x)
        x = self.fire2(x)
        x = self.fire3(x)
        x = self.fire4(x)
        x = self.maxpool2(x)
        x = self.fire5(x)
        x = self.fire6(x)
        x = self.fire7(x)
        x = self.fire8(x)
        x = self.maxpool3(x)
        x = self.fire9(x)
        x = self.conv2(x)
        x = self.avg_pool(x)
        x = self.softmax(x)
        return x

In [None]:
class Net(nn.Module):
    def __init__(self, num_classes=50, input_size=128, in_channels=1):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 3, 1, 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.squeezenet = SqueezeNet()
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(10, num_classes)
    def forward(self, x):
        x = self.pool(self.conv1(x))
        x = self.squeezenet(x)
        x = self.gap(x)
        x = x.view(-1, 10)
        x = self.fc(x)
        return x

In [None]:
net = Net(input_size=image_size, num_classes=len(classes), in_channels=1)
net = net.to(device)
net = net.train()

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

In [None]:
# Train the network

for epoch in range(number_of_epoch):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(tqdm(train_loader), 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        optimizer.step()
        
        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
                
print('Finished Training')

In [None]:
# # Test the network on the validation data

# if validation:
#     correct = 0
#     total = 0
#     net = net.eval()
#     with torch.no_grad():
#         for data in val_loader:
#             images, labels = data[0].to(device), data[1].to(device)
#             outputs = net(images)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
    
#     print('Accuracy of the network on the validation images: %d %%' % (100 * correct / total))

In [None]:
# Find number of files in the test folder
number_of_files = len(os.listdir("/kaggle/input/iith-dl-contest-2024/test/test"))
print(number_of_files)

In [None]:
# Actual test data is in the test/test folder, load it one by one and predict the class

predicted_arr = []

tf=transforms.Compose([
    transforms.Resize((image_size,image_size)),
    transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

net = net.eval()

# Load the test data using os.listdir
for id in tqdm(range(number_of_files)):
    # Load the test data using tv
    img = Image.open(f"/kaggle/input/iith-dl-contest-2024/test/test/{id}.JPEG")
    img = tf(img)
    
    img = img.to(device)
    
    # Predict the class
    output = net(img.unsqueeze(0))
    _, predicted = torch.max(output.data, 1)
    
    predicted_arr.append(classes[predicted])

In [None]:
correct_csv = False
while not correct_csv:
    # Declare csv file
    f = open("submission.csv", "w")
    f.write("ID,Category\n")
    for id in range(number_of_files):
        f.write(f"{id}.JPEG,{predicted_arr[id]}\n")
    f.close()
    # Check if the csv file is correct
    lines = len(open("submission.csv", "r").readlines())
    if lines == number_of_files + 1:
        correct_csv = True