In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms, models
# import torchvision.models as models
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import csv

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  
])

In [4]:

model = models.convnext_tiny(weights= None).to(device)

In [5]:
# print(model)

In [6]:
model.classifier[2] = nn.Linear(768,50)

In [7]:
import torchinfo

torchinfo.summary(model, (32, 3,64,64))

Layer (type:depth-idx)                        Output Shape              Param #
ConvNeXt                                      [32, 50]                  --
├─Sequential: 1-1                             [32, 768, 2, 2]           --
│    └─Conv2dNormActivation: 2-1              [32, 96, 16, 16]          --
│    │    └─Conv2d: 3-1                       [32, 96, 16, 16]          4,704
│    │    └─LayerNorm2d: 3-2                  [32, 96, 16, 16]          192
│    └─Sequential: 2-2                        [32, 96, 16, 16]          --
│    │    └─CNBlock: 3-3                      [32, 96, 16, 16]          79,296
│    │    └─CNBlock: 3-4                      [32, 96, 16, 16]          79,296
│    │    └─CNBlock: 3-5                      [32, 96, 16, 16]          79,296
│    └─Sequential: 2-3                        [32, 192, 8, 8]           --
│    │    └─LayerNorm2d: 3-6                  [32, 96, 16, 16]          192
│    │    └─Conv2d: 3-7                       [32, 192, 8, 8]           73,920

In [8]:
train_dir = '/kaggle/input/iith-dl-contest-2024/train/train'
train_data  = datasets.ImageFolder(train_dir, transform = transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)

In [9]:
test_dir = '/kaggle/input/iith-dl-contest-2024/test'
test_data  = datasets.ImageFolder(test_dir, transform = transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size= 64, shuffle=False)

In [10]:
# Reverse the class to index mapping to index to class for prediction interpretation
classes = train_data.class_to_idx
idx_to_class = {idx: class_name for class_name, idx in classes.items()}

In [11]:
# Convert class indices to class names


# Generate image file names
file_names = [f"{i}.JPEG" for i in range(len(test_data))]

file_names = np.sort(file_names)

In [12]:
# Defining loss criterion and optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()  # Setting the model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_samples = 0
    for i, (inputs, labels) in tqdm(enumerate(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()  # Clearing gradients

        # Forward pass
        outputs = model(inputs)

        # Computing loss
        loss = loss_func(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Updating running loss
        running_loss += loss.item() * inputs.size(0)

        # Calculating accuracy
        _, predicted = torch.max(outputs, 1)
        correct_preds += torch.sum(predicted == labels).item()
        total_samples += labels.size(0)

    # Printing average loss and accuracy for the current epoch
    epoch_loss = running_loss / len(train_data)
    epoch_acc = correct_preds / total_samples
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')
    if epoch + 1 > 8:
        
        outputs_list = []

        # Process images and gather predictions
        for images, _ in tqdm(test_loader):
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, dim=1)
            outputs_list.append(predicted)

        # Concatenate all predictions into a single tensor
        outputs = torch.cat(outputs_list).cpu().numpy()
        predicted_classes = np.array([idx_to_class[idx] for idx in outputs], dtype=object)
        # Combine file names and predicted classes
        table = np.column_stack((file_names, predicted_classes))

        # Write results to CSV file
        str = "submission{}.csv".format(epoch + 1)
        with open(str, 'w', newline='') as file:
            wr = csv.writer(file)
            wr.writerow(['ID', 'Category'])
            wr.writerows(table)
        str = "/kaggle/working/weights{}.pth".format(epoch + 1)
        torch.save(model.state_dict(), str)
        
        
    

2032it [28:26,  1.19it/s]


Epoch [1/20], Loss: 3.3052, Accuracy: 0.1528


2032it [24:12,  1.40it/s]


Epoch [2/20], Loss: 2.7285, Accuracy: 0.2767


2032it [24:14,  1.40it/s]


Epoch [3/20], Loss: 2.3453, Accuracy: 0.3653


2032it [24:25,  1.39it/s]


Epoch [4/20], Loss: 2.0909, Accuracy: 0.4235


2032it [24:26,  1.39it/s]


Epoch [5/20], Loss: 1.8782, Accuracy: 0.4786


2032it [24:38,  1.37it/s]


Epoch [6/20], Loss: 1.6631, Accuracy: 0.5303


2032it [24:34,  1.38it/s]


Epoch [7/20], Loss: 1.4695, Accuracy: 0.5785


2032it [25:11,  1.34it/s]


Epoch [8/20], Loss: 1.2869, Accuracy: 0.6264


2032it [24:16,  1.39it/s]


Epoch [9/20], Loss: 1.1174, Accuracy: 0.6711


100%|██████████| 600/600 [04:27<00:00,  2.24it/s]
2032it [24:57,  1.36it/s]


Epoch [10/20], Loss: 0.9417, Accuracy: 0.7171


100%|██████████| 600/600 [03:15<00:00,  3.07it/s]
2032it [24:07,  1.40it/s]


Epoch [11/20], Loss: 0.7629, Accuracy: 0.7676


100%|██████████| 600/600 [02:57<00:00,  3.39it/s]
2032it [23:58,  1.41it/s]


Epoch [12/20], Loss: 0.5926, Accuracy: 0.8154


100%|██████████| 600/600 [04:09<00:00,  2.40it/s]
2032it [25:32,  1.33it/s]


Epoch [13/20], Loss: 0.4517, Accuracy: 0.8566


100%|██████████| 600/600 [03:05<00:00,  3.23it/s]
2032it [23:53,  1.42it/s]


Epoch [14/20], Loss: 0.3522, Accuracy: 0.8876


100%|██████████| 600/600 [03:02<00:00,  3.28it/s]
2032it [23:37,  1.43it/s]


Epoch [15/20], Loss: 0.2853, Accuracy: 0.9072


100%|██████████| 600/600 [03:10<00:00,  3.16it/s]
2032it [23:36,  1.43it/s]


Epoch [16/20], Loss: 0.2492, Accuracy: 0.9203


100%|██████████| 600/600 [02:59<00:00,  3.35it/s]
2032it [23:50,  1.42it/s]


Epoch [17/20], Loss: 0.2207, Accuracy: 0.9297


100%|██████████| 600/600 [03:01<00:00,  3.30it/s]
2032it [23:42,  1.43it/s]


Epoch [18/20], Loss: 0.1941, Accuracy: 0.9373


100%|██████████| 600/600 [02:59<00:00,  3.35it/s]
2032it [23:37,  1.43it/s]


Epoch [19/20], Loss: 0.1823, Accuracy: 0.9411


100%|██████████| 600/600 [02:58<00:00,  3.36it/s]
2032it [23:52,  1.42it/s]


Epoch [20/20], Loss: 0.1674, Accuracy: 0.9478


100%|██████████| 600/600 [03:58<00:00,  2.52it/s]
