In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms, models
# import torchvision.models as models
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import numpy as np
import csv

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  
])

In [4]:

model = models.convnext_tiny(weights= None).to(device)

In [5]:
# print(model)

In [6]:
model.classifier[2] = nn.Linear(768,50)

In [7]:
import torchinfo

torchinfo.summary(model, (32, 3,64,64))

Layer (type:depth-idx)                        Output Shape              Param #
ConvNeXt                                      [32, 50]                  --
├─Sequential: 1-1                             [32, 768, 2, 2]           --
│    └─Conv2dNormActivation: 2-1              [32, 96, 16, 16]          --
│    │    └─Conv2d: 3-1                       [32, 96, 16, 16]          4,704
│    │    └─LayerNorm2d: 3-2                  [32, 96, 16, 16]          192
│    └─Sequential: 2-2                        [32, 96, 16, 16]          --
│    │    └─CNBlock: 3-3                      [32, 96, 16, 16]          79,296
│    │    └─CNBlock: 3-4                      [32, 96, 16, 16]          79,296
│    │    └─CNBlock: 3-5                      [32, 96, 16, 16]          79,296
│    └─Sequential: 2-3                        [32, 192, 8, 8]           --
│    │    └─LayerNorm2d: 3-6                  [32, 96, 16, 16]          192
│    │    └─Conv2d: 3-7                       [32, 192, 8, 8]           73,920

In [8]:
# model = CNN(num_classes=50).to(device)

In [9]:
train_dir = '/kaggle/input/iith-dl-contest-2024/train/train'
train_data  = datasets.ImageFolder(train_dir, transform = transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size= 64, shuffle=True)

In [10]:
test_dir = '/kaggle/input/iith-dl-contest-2024/test'
test_data  = datasets.ImageFolder(test_dir, transform = transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size= 64, shuffle=False)

In [11]:
# Reverse the class to index mapping to index to class for prediction interpretation
classes = train_data.class_to_idx
idx_to_class = {idx: class_name for class_name, idx in classes.items()}

In [12]:
# Convert class indices to class names


# Generate image file names
file_names = [f"{i}.JPEG" for i in range(len(test_data))]

file_names = np.sort(file_names)

In [13]:
# Defining loss criterion and optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()  # Setting the model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_samples = 0
    for i, (inputs, labels) in tqdm(enumerate(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()  # Clearing gradients

        # Forward pass
        outputs = model(inputs)

        # Computing loss
        loss = loss_func(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Updating running loss
        running_loss += loss.item() * inputs.size(0)

        # Calculating accuracy
        _, predicted = torch.max(outputs, 1)
        correct_preds += torch.sum(predicted == labels).item()
        total_samples += labels.size(0)

    # Printing average loss and accuracy for the current epoch
    epoch_loss = running_loss / len(train_data)
    epoch_acc = correct_preds / total_samples
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')
    if epoch + 1 > 8:
        
        outputs_list = []

        # Process images and gather predictions
        for images, _ in tqdm(test_loader):
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, dim=1)
            outputs_list.append(predicted)

        # Concatenate all predictions into a single tensor
        outputs = torch.cat(outputs_list).cpu().numpy()
        predicted_classes = np.array([idx_to_class[idx] for idx in outputs], dtype=object)
        # Combine file names and predicted classes
        table = np.column_stack((file_names, predicted_classes))

        # Write results to CSV file
        str = "submission{}.csv".format(epoch + 1)
        with open('submission1.csv', 'w', newline='') as file:
            wr = csv.writer(file)
            wr.writerow(['ID', 'Category'])
            wr.writerows(table)

        
    

1016it [27:47,  1.64s/it]


Epoch [1/20], Loss: 3.3157, Accuracy: 0.1506


1016it [23:37,  1.40s/it]


Epoch [2/20], Loss: 2.7437, Accuracy: 0.2708


1016it [23:38,  1.40s/it]


Epoch [3/20], Loss: 2.3623, Accuracy: 0.3592


1016it [23:08,  1.37s/it]


Epoch [4/20], Loss: 2.0860, Accuracy: 0.4270


1016it [22:50,  1.35s/it]


Epoch [5/20], Loss: 1.8764, Accuracy: 0.4783


1016it [22:45,  1.34s/it]


Epoch [6/20], Loss: 1.6912, Accuracy: 0.5227


1016it [22:37,  1.34s/it]


Epoch [7/20], Loss: 1.4969, Accuracy: 0.5744


1016it [22:51,  1.35s/it]


Epoch [8/20], Loss: 1.3000, Accuracy: 0.6259


1016it [22:34,  1.33s/it]


Epoch [9/20], Loss: 1.1073, Accuracy: 0.6731


100%|██████████| 600/600 [03:50<00:00,  2.61it/s]
1016it [22:36,  1.34s/it]


Epoch [10/20], Loss: 0.9155, Accuracy: 0.7243


100%|██████████| 600/600 [02:52<00:00,  3.48it/s]
1016it [22:36,  1.34s/it]


Epoch [11/20], Loss: 0.7177, Accuracy: 0.7813


100%|██████████| 600/600 [03:13<00:00,  3.11it/s]
1016it [22:38,  1.34s/it]


Epoch [12/20], Loss: 0.5367, Accuracy: 0.8304


100%|██████████| 600/600 [02:53<00:00,  3.45it/s]
1016it [22:41,  1.34s/it]


Epoch [13/20], Loss: 0.4045, Accuracy: 0.8707


100%|██████████| 600/600 [02:53<00:00,  3.45it/s]
1016it [22:45,  1.34s/it]


Epoch [14/20], Loss: 0.3067, Accuracy: 0.9009


100%|██████████| 600/600 [02:55<00:00,  3.43it/s]
1016it [22:42,  1.34s/it]


Epoch [15/20], Loss: 0.2504, Accuracy: 0.9202


100%|██████████| 600/600 [03:03<00:00,  3.28it/s]
1016it [22:46,  1.34s/it]


Epoch [16/20], Loss: 0.2153, Accuracy: 0.9314


100%|██████████| 600/600 [02:54<00:00,  3.43it/s]
1016it [22:48,  1.35s/it]


Epoch [17/20], Loss: 0.1945, Accuracy: 0.9380


100%|██████████| 600/600 [02:53<00:00,  3.45it/s]
1016it [22:46,  1.34s/it]


Epoch [18/20], Loss: 0.1784, Accuracy: 0.9433


100%|██████████| 600/600 [02:53<00:00,  3.45it/s]
1016it [22:44,  1.34s/it]


Epoch [19/20], Loss: 0.1674, Accuracy: 0.9463


100%|██████████| 600/600 [02:53<00:00,  3.46it/s]
1016it [22:44,  1.34s/it]


Epoch [20/20], Loss: 0.1508, Accuracy: 0.9516


100%|██████████| 600/600 [02:54<00:00,  3.44it/s]
