In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms, models
# import torchvision.models as models
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  
])

In [4]:

model = models.resnet50(weights= None)

In [5]:
# print(model)

In [6]:
model.fc = nn.Linear(2048,50)

In [7]:
import torchinfo

torchinfo.summary(model, (32, 3,64,64))

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [32, 50]                  --
├─Conv2d: 1-1                            [32, 64, 32, 32]          9,408
├─BatchNorm2d: 1-2                       [32, 64, 32, 32]          128
├─ReLU: 1-3                              [32, 64, 32, 32]          --
├─MaxPool2d: 1-4                         [32, 64, 16, 16]          --
├─Sequential: 1-5                        [32, 256, 16, 16]         --
│    └─Bottleneck: 2-1                   [32, 256, 16, 16]         --
│    │    └─Conv2d: 3-1                  [32, 64, 16, 16]          4,096
│    │    └─BatchNorm2d: 3-2             [32, 64, 16, 16]          128
│    │    └─ReLU: 3-3                    [32, 64, 16, 16]          --
│    │    └─Conv2d: 3-4                  [32, 64, 16, 16]          36,864
│    │    └─BatchNorm2d: 3-5             [32, 64, 16, 16]          128
│    │    └─ReLU: 3-6                    [32, 64, 16, 16]          --
│ 

In [8]:
train_dir = '/kaggle/input/iith-dl-contest-2024/train/train'
train_data  = datasets.ImageFolder(train_dir, transform = transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)

In [9]:
# Defining loss criterion and optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()  # Setting the model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_samples = 0
    for i, (inputs, labels) in tqdm(enumerate(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()  # Clearing gradients

        # Forward pass
        outputs = model(inputs)

        # Computing loss
        loss = loss_func(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Updating running loss
        running_loss += loss.item() * inputs.size(0)

        # Calculating accuracy
        _, predicted = torch.max(outputs, 1)
        correct_preds += torch.sum(predicted == labels).item()
        total_samples += labels.size(0)

    # Printing average loss and accuracy for the current epoch
    epoch_loss = running_loss / len(train_data)
    epoch_acc = correct_preds / total_samples
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')
    if epoch%2 + 1==2:
        str = "/kaggle/working/weights{}.pth".format(epoch + 1)
        torch.save(model.state_dict(), str)
        
    

2032it [13:08,  2.58it/s]


Epoch [1/20], Loss: 3.2413, Accuracy: 0.1751


2032it [08:02,  4.22it/s]


Epoch [2/20], Loss: 2.5973, Accuracy: 0.3064


2032it [08:02,  4.21it/s]


Epoch [3/20], Loss: 2.3150, Accuracy: 0.3694


2032it [08:13,  4.12it/s]


Epoch [4/20], Loss: 2.0623, Accuracy: 0.4286


2032it [07:59,  4.23it/s]


Epoch [5/20], Loss: 1.8150, Accuracy: 0.4901


2032it [07:55,  4.27it/s]


Epoch [6/20], Loss: 1.6154, Accuracy: 0.5395


2032it [07:57,  4.25it/s]


Epoch [7/20], Loss: 1.4039, Accuracy: 0.5930


2032it [07:51,  4.31it/s]


Epoch [8/20], Loss: 1.1517, Accuracy: 0.6592


2032it [07:54,  4.28it/s]


Epoch [9/20], Loss: 0.8854, Accuracy: 0.7326


2032it [08:00,  4.23it/s]


Epoch [10/20], Loss: 0.7347, Accuracy: 0.7732


2032it [07:57,  4.25it/s]


Epoch [11/20], Loss: 0.4750, Accuracy: 0.8493


2032it [07:56,  4.27it/s]


Epoch [12/20], Loss: 0.3426, Accuracy: 0.8899


2032it [07:50,  4.32it/s]


Epoch [13/20], Loss: 0.3152, Accuracy: 0.8992


2032it [07:49,  4.32it/s]


Epoch [14/20], Loss: 0.2190, Accuracy: 0.9284


2032it [07:50,  4.32it/s]


Epoch [15/20], Loss: 0.2246, Accuracy: 0.9265


2032it [07:56,  4.27it/s]


Epoch [16/20], Loss: 0.2051, Accuracy: 0.9343


2032it [08:04,  4.19it/s]


Epoch [17/20], Loss: 0.1722, Accuracy: 0.9444


2032it [07:53,  4.29it/s]


Epoch [18/20], Loss: 0.1657, Accuracy: 0.9473


2032it [07:56,  4.27it/s]


Epoch [19/20], Loss: 0.1349, Accuracy: 0.9566


2032it [07:51,  4.31it/s]


Epoch [20/20], Loss: 0.1412, Accuracy: 0.9547


In [10]:
test_dir = '/kaggle/input/iith-dl-contest-2024/test'
test_data  = datasets.ImageFolder(test_dir, transform = transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=False)

In [11]:
import numpy as np
import csv

In [12]:
# Reverse the class to index mapping to index to class for prediction interpretation
classes = train_data.class_to_idx
idx_to_class = {idx: class_name for class_name, idx in classes.items()}

In [13]:
# List to store outputs
outputs_list = []

# Process images and gather predictions
for images, _ in tqdm(test_loader):
    images = images.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, dim=1)
    outputs_list.append(predicted)

# Concatenate all predictions into a single tensor
outputs = torch.cat(outputs_list).cpu().numpy()

100%|██████████| 1199/1199 [04:20<00:00,  4.60it/s]


In [14]:
import numpy as np

# Convert class indices to class names
predicted_classes = np.array([idx_to_class[idx] for idx in outputs], dtype=object)

# Generate image file names
file_names = [f"{i}.JPEG" for i in range(len(predicted_classes))]

file_names = np.sort(file_names)

In [15]:
# Combine file names and predicted classes
table = np.column_stack((file_names, predicted_classes))

# Print table (optional, can be commented out in production)
print(table)

[['0.JPEG' 'n02808440']
 ['1.JPEG' 'n02480495']
 ['10.JPEG' 'n01784675']
 ...
 ['9997.JPEG' 'n02486410']
 ['9998.JPEG' 'n02906734']
 ['9999.JPEG' 'n02056570']]


In [16]:
# Write results to CSV file
with open('submission1.csv', 'w', newline='') as file:
    wr = csv.writer(file)
    wr.writerow(['ID', 'Category'])
    wr.writerows(table)