In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms, models
# import torchvision.models as models
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import csv

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  
])

In [4]:

model = models.resnet18(weights= None).to(device)

In [5]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [6]:
model.fc = nn.Linear(512,50)

In [7]:
import torchinfo

torchinfo.summary(model, (32, 3,224,224))

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [32, 50]                  --
├─Conv2d: 1-1                            [32, 64, 112, 112]        9,408
├─BatchNorm2d: 1-2                       [32, 64, 112, 112]        128
├─ReLU: 1-3                              [32, 64, 112, 112]        --
├─MaxPool2d: 1-4                         [32, 64, 56, 56]          --
├─Sequential: 1-5                        [32, 64, 56, 56]          --
│    └─BasicBlock: 2-1                   [32, 64, 56, 56]          --
│    │    └─Conv2d: 3-1                  [32, 64, 56, 56]          36,864
│    │    └─BatchNorm2d: 3-2             [32, 64, 56, 56]          128
│    │    └─ReLU: 3-3                    [32, 64, 56, 56]          --
│    │    └─Conv2d: 3-4                  [32, 64, 56, 56]          36,864
│    │    └─BatchNorm2d: 3-5             [32, 64, 56, 56]          128
│    │    └─ReLU: 3-6                    [32, 64, 56, 56]          --
│

In [8]:
train_dir = '/kaggle/input/iith-dl-contest-2024/train/train'
train_data  = datasets.ImageFolder(train_dir, transform = transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)

In [9]:
test_dir = '/kaggle/input/iith-dl-contest-2024/test'
test_data  = datasets.ImageFolder(test_dir, transform = transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size= 64, shuffle=False)

In [10]:
# Reverse the class to index mapping to index to class for prediction interpretation
classes = train_data.class_to_idx
idx_to_class = {idx: class_name for class_name, idx in classes.items()}

In [11]:
# Convert class indices to class names


# Generate image file names
file_names = [f"{i}.JPEG" for i in range(len(test_data))]

file_names = np.sort(file_names)

In [12]:
# Defining loss criterion and optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()  # Setting the model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_samples = 0
    for i, (inputs, labels) in tqdm(enumerate(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()  # Clearing gradients

        # Forward pass
        outputs = model(inputs)

        # Computing loss
        loss = loss_func(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Updating running loss
        running_loss += loss.item() * inputs.size(0)

        # Calculating accuracy
        _, predicted = torch.max(outputs, 1)
        correct_preds += torch.sum(predicted == labels).item()
        total_samples += labels.size(0)

    # Printing average loss and accuracy for the current epoch
    epoch_loss = running_loss / len(train_data)
    epoch_acc = correct_preds / total_samples
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')
    if epoch + 1 > 8:
        
        outputs_list = []

        # Process images and gather predictions
        for images, _ in tqdm(test_loader):
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, dim=1)
            outputs_list.append(predicted)

        # Concatenate all predictions into a single tensor
        outputs = torch.cat(outputs_list).cpu().numpy()
        predicted_classes = np.array([idx_to_class[idx] for idx in outputs], dtype=object)
        # Combine file names and predicted classes
        table = np.column_stack((file_names, predicted_classes))

        # Write results to CSV file
        str = "submission{}.csv".format(epoch + 1)
        with open(str, 'w', newline='') as file:
            wr = csv.writer(file)
            wr.writerow(['ID', 'Category'])
            wr.writerows(table)
        str = "/kaggle/working/weights{}.pth".format(epoch + 1)
        torch.save(model.state_dict(), str)
        
        
    

2032it [14:37,  2.32it/s]


Epoch [1/20], Loss: 2.8867, Accuracy: 0.2358


2032it [10:51,  3.12it/s]


Epoch [2/20], Loss: 2.0947, Accuracy: 0.4159


2032it [10:06,  3.35it/s]


Epoch [3/20], Loss: 1.6923, Accuracy: 0.5175


2032it [10:06,  3.35it/s]


Epoch [4/20], Loss: 1.4010, Accuracy: 0.5942


2032it [10:03,  3.37it/s]


Epoch [5/20], Loss: 1.1463, Accuracy: 0.6643


2032it [09:59,  3.39it/s]


Epoch [6/20], Loss: 0.9155, Accuracy: 0.7273


2032it [10:05,  3.35it/s]


Epoch [7/20], Loss: 0.7025, Accuracy: 0.7855


2032it [10:27,  3.24it/s]


Epoch [8/20], Loss: 0.4907, Accuracy: 0.8460


2032it [10:31,  3.22it/s]


Epoch [9/20], Loss: 0.3313, Accuracy: 0.8950


100%|██████████| 600/600 [04:35<00:00,  2.18it/s]
2032it [10:03,  3.36it/s]


Epoch [10/20], Loss: 0.2280, Accuracy: 0.9261


100%|██████████| 600/600 [02:47<00:00,  3.58it/s]
2032it [10:07,  3.34it/s]


Epoch [11/20], Loss: 0.1808, Accuracy: 0.9414


100%|██████████| 600/600 [02:28<00:00,  4.03it/s]
2032it [10:31,  3.22it/s]


Epoch [12/20], Loss: 0.1422, Accuracy: 0.9535


100%|██████████| 600/600 [02:32<00:00,  3.94it/s]
2032it [10:00,  3.38it/s]


Epoch [13/20], Loss: 0.1311, Accuracy: 0.9569


100%|██████████| 600/600 [02:48<00:00,  3.56it/s]
2032it [10:04,  3.36it/s]


Epoch [14/20], Loss: 0.1102, Accuracy: 0.9643


100%|██████████| 600/600 [02:26<00:00,  4.11it/s]
2032it [09:54,  3.42it/s]


Epoch [15/20], Loss: 0.1099, Accuracy: 0.9634


100%|██████████| 600/600 [02:28<00:00,  4.04it/s]
2032it [10:08,  3.34it/s]


Epoch [16/20], Loss: 0.0954, Accuracy: 0.9689


100%|██████████| 600/600 [03:11<00:00,  3.13it/s]
2032it [10:40,  3.17it/s]


Epoch [17/20], Loss: 0.0859, Accuracy: 0.9724


100%|██████████| 600/600 [02:20<00:00,  4.26it/s]
2032it [10:00,  3.38it/s]


Epoch [18/20], Loss: 0.0804, Accuracy: 0.9740


100%|██████████| 600/600 [02:27<00:00,  4.05it/s]
2032it [10:22,  3.26it/s]


Epoch [19/20], Loss: 0.0750, Accuracy: 0.9758


100%|██████████| 600/600 [02:22<00:00,  4.20it/s]
2032it [10:22,  3.27it/s]


Epoch [20/20], Loss: 0.0724, Accuracy: 0.9764


100%|██████████| 600/600 [03:24<00:00,  2.93it/s]
