In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.init as init
import csv

In [5]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(10),      # Randomly rotate the image by up to 10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly adjust brightness, contrast, saturation, and hue
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device='cpu'
print(device)

cpu


In [6]:
# train_datapath = '/kaggle/input/iith-dl-contest-2024/train/train'
train_datapath = '../iith-dl-contest-2024/train/train'
train_data = datasets.ImageFolder(root=train_datapath, transform=transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
# # how to access the train data
# for i, (data, target) in enumerate(train_loader):
#     print(data.shape)

In [7]:
# create a map for the lables corrosponds to the folder names
label_map = train_data.class_to_idx
print(label_map['n01443537'])

# create a map for the folder names corrosponds to the lables
label_map = dict((v,k) for k,v in label_map.items())
print(label_map[0])

0
n01443537


In [8]:
from torchvision import models

# model = models.resnet18(weights=None).to(device)
model = models.convnext_tiny(weights=None).to(device)

print(sum(p.numel() for p in model.parameters() if p.requires_grad))


28589128


In [10]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device='cpu'
print(device)
# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available. GPU detected.")
else:
    print("CUDA is not available. CPU will be used.resnet34")

cpu
CUDA is available. GPU detected.


In [11]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [13]:
# calculate the test data accuracy
# test_datapath = '/kaggle/input/iith-dl-contest-2024/test' # this folder itself contains images
test_datapath = '../iith-dl-contest-2024/test'
# read all the images in test_datapath
test_data = datasets.ImageFolder(root=test_datapath, transform=transform)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [20]:
# store all the predictions of test data in a list
def return_predictons(model, test_loader):
    predictions = []
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            # move the images and labels to GPU
            images, labels = images.to(device), labels.to(device)
            outputs = model.forward(images)
            print(outputs.data)
            print(outputs)
            break
            _, predicted = torch.max(outputs, 1)
            predictions.extend(predicted)
    return predictions

x_out = return_predictons(model, test_loader)

tensor([[ 0.2586, -0.1420, -0.1266,  ..., -0.6071,  0.6432,  1.4896],
        [-0.6442, -0.1577,  0.1033,  ...,  0.2161, -0.0163, -0.4122],
        [ 0.2990, -0.4317,  0.0743,  ...,  0.4321,  0.9758,  1.8230],
        ...,
        [ 0.6165, -0.8063,  0.0734,  ..., -0.3117,  0.6831,  0.2239],
        [ 0.3990, -0.4963,  0.6716,  ..., -0.2005,  0.5602,  0.8484],
        [ 0.3432, -0.2859,  0.5579,  ..., -0.2510,  1.4484,  0.2894]])
tensor([[ 0.2586, -0.1420, -0.1266,  ..., -0.6071,  0.6432,  1.4896],
        [-0.6442, -0.1577,  0.1033,  ...,  0.2161, -0.0163, -0.4122],
        [ 0.2990, -0.4317,  0.0743,  ...,  0.4321,  0.9758,  1.8230],
        ...,
        [ 0.6165, -0.8063,  0.0734,  ..., -0.3117,  0.6831,  0.2239],
        [ 0.3990, -0.4963,  0.6716,  ..., -0.2005,  0.5602,  0.8484],
        [ 0.3432, -0.2859,  0.5579,  ..., -0.2510,  1.4484,  0.2894]])


In [30]:
# store all the images names in a list for eg 1.jpeg, 2.jpeg, 3.jpeg

def return_images_names(test_data):
    images_names = []
    for i in range(len(test_data)):
        images_names.append(test_data.imgs[i][0].split('/')[-1])
    return images_names


In [31]:
def generate_csv_for_each_epoch(model, test_loader, test_data, epoch):
    predictions = return_predictons(model, test_loader)
    images_names = return_images_names(test_data)
    with open('/kaggle/working/convnext_epoch'+str(epoch)+'.csv', mode='w') as file:
        writer = csv.writer(file)
        writer.writerow(['ID', 'Category'])
        for i in range(len(images_names)):
            writer.writerow([images_names[i], label_map[predictions[i].item()]])

In [32]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=2):
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0

        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        # Print the running loss and accuracy only for epochs >= 14
        if epoch >= 14:
            epoch_loss = running_loss / len(train_loader)
            epoch_accuracy = correct_predictions / total_samples
            print(f'Epoch {epoch+1}, Loss: {epoch_loss}, Accuracy: {epoch_accuracy}')
            print(f'Finished epoch {epoch}.')
            generate_csv_for_each_epoch(model, test_loader, test_data, epoch)
            print('CSV file generated for epoch', epoch)
        else:
            print(f'Completed epoch {epoch}.')



In [33]:
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

KeyboardInterrupt: 