In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  
])

In [4]:
class CNN(nn.Module):
    def __init__(self, num_classes=50):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x))
        x = self.pool(x)
        x = self.relu(self.conv4(x))
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [5]:
model = CNN(num_classes=50).to(device)

In [6]:
train_dir = '/kaggle/input/iith-dl-contest-2024/train/train'
train_data  = datasets.ImageFolder(train_dir, transform = transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)

In [8]:
# Defining loss criterion and optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()  # Setting the model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_samples = 0
    for i, (inputs, labels) in tqdm(enumerate(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()  # Clearing gradients

        # Forward pass
        outputs = model(inputs)

        # Computing loss
        loss = loss_func(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Updating running loss
        running_loss += loss.item() * inputs.size(0)

        # Calculating accuracy
        _, predicted = torch.max(outputs, 1)
        correct_preds += torch.sum(predicted == labels).item()
        total_samples += labels.size(0)

    # Printing average loss and accuracy for the current epoch
    epoch_loss = running_loss / len(train_data)
    epoch_acc = correct_preds / total_samples
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

2032it [05:58,  5.66it/s]


Epoch [1/10], Loss: 2.7806, Accuracy: 0.2562


2032it [05:49,  5.81it/s]


Epoch [2/10], Loss: 2.5298, Accuracy: 0.3165


2032it [05:40,  5.97it/s]


Epoch [3/10], Loss: 2.3086, Accuracy: 0.3702


2032it [05:53,  5.75it/s]


Epoch [4/10], Loss: 2.1218, Accuracy: 0.4152


2032it [06:23,  5.29it/s]


Epoch [5/10], Loss: 1.9480, Accuracy: 0.4587


2032it [05:58,  5.67it/s]


Epoch [6/10], Loss: 1.7958, Accuracy: 0.4942


2032it [05:39,  5.99it/s]


Epoch [7/10], Loss: 1.6550, Accuracy: 0.5330


2032it [05:52,  5.76it/s]


Epoch [8/10], Loss: 1.5375, Accuracy: 0.5598


2032it [05:49,  5.82it/s]


Epoch [9/10], Loss: 1.4310, Accuracy: 0.5892


2032it [05:35,  6.05it/s]

Epoch [10/10], Loss: 1.3293, Accuracy: 0.6188





In [16]:
epochs = 5
for epoch in range(epochs):
    model.train()  # Setting the model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_samples = 0
    for i, (inputs, labels) in tqdm(enumerate(train_loader)):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()  # Clearing gradients

        # Forward pass
        outputs = model(inputs)

        # Computing loss
        loss = loss_func(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Updating running loss
        running_loss += loss.item() * inputs.size(0)

        # Calculating accuracy
        _, predicted = torch.max(outputs, 1)
        correct_preds += torch.sum(predicted == labels).item()
        total_samples += labels.size(0)

    # Printing average loss and accuracy for the current epoch
    epoch_loss = running_loss / len(train_data)
    epoch_acc = correct_preds / total_samples
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

2032it [05:54,  5.73it/s]


Epoch [1/5], Loss: 1.2358, Accuracy: 0.6396


2032it [05:54,  5.73it/s]


Epoch [2/5], Loss: 1.1443, Accuracy: 0.6645


2032it [05:50,  5.80it/s]


Epoch [3/5], Loss: 1.0637, Accuracy: 0.6839


2032it [05:46,  5.86it/s]


Epoch [4/5], Loss: 0.9771, Accuracy: 0.7066


2032it [05:39,  5.98it/s]

Epoch [5/5], Loss: 0.9055, Accuracy: 0.7252





In [17]:
test_dir = '/kaggle/input/iith-dl-contest-2024/test'
test_data  = datasets.ImageFolder(test_dir, transform = transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=False)

In [18]:
import numpy as np
import csv

In [19]:
# Reverse the class to index mapping to index to class for prediction interpretation
classes = train_data.class_to_idx
idx_to_class = {idx: class_name for class_name, idx in classes.items()}

In [20]:
# List to store outputs
outputs_list = []

# Process images and gather predictions
for images, _ in tqdm(test_loader):
    images = images.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, dim=1)
    outputs_list.append(predicted)

# Concatenate all predictions into a single tensor
outputs = torch.cat(outputs_list).cpu().numpy()

100%|██████████| 1199/1199 [01:26<00:00, 13.88it/s]


In [21]:
import numpy as np

# Convert class indices to class names
predicted_classes = np.array([idx_to_class[idx] for idx in outputs], dtype=object)

# Generate image file names
file_names = [f"{i}.JPEG" for i in range(len(predicted_classes))]

file_names = np.sort(file_names)

In [22]:
# Combine file names and predicted classes
table = np.column_stack((file_names, predicted_classes))

# Print table (optional, can be commented out in production)
print(table)

[['0.JPEG' 'n02808440']
 ['1.JPEG' 'n02395406']
 ['10.JPEG' 'n02190166']
 ...
 ['9997.JPEG' 'n02486410']
 ['9998.JPEG' 'n02802426']
 ['9999.JPEG' 'n01784675']]


In [24]:
# Write results to CSV file
with open('submission1.csv', 'w', newline='') as file:
    wr = csv.writer(file)
    wr.writerow(['ID', 'Category'])
    wr.writerows(table)

In [25]:
torch.save(model.state_dict(),"/kaggle/working/model_weights_40.pth")