In [1]:
pip install torchsummary

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl.metadata (296 bytes)
Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
import time
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from datetime import datetime

In [3]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import os
from sklearn.model_selection import train_test_split
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import matplotlib as plt
import numpy as np
import pandas as pd

image_paths = ['/kaggle/input/dip-lab-hackathon-2024-image-classification/data/train', '/kaggle/input/dip-lab-hackathon-2024-image-classification/data/test']

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),    
    transforms.RandomHorizontalFlip(),    
    transforms.RandomRotation(15),        
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), 
    transforms.ToTensor(),                
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),   
])

val_transform = transforms.Compose([
    transforms.Resize(256),              
    transforms.CenterCrop(224),           
    transforms.ToTensor(),               
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  
])

Ds_target = datasets.ImageFolder(root='/kaggle/input/dip-lab-hackathon-2024-image-classification/data/train', transform=train_transform)

train_size = int(0.9 * len(Ds_target))
val_size = len(Ds_target) - train_size
training_set, validation_set = torch.utils.data.random_split(Ds_target, [train_size, val_size])

validation_set.dataset.transform = val_transform

training_loader = DataLoader(training_set, batch_size=64, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=64, shuffle=False)

print(f"Training set size: {len(training_set)}")
print(f"Validation set size: {len(validation_set)}")

Training set size: 4502
Validation set size: 501


In [4]:
model_urls = {
     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 }

In [5]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64
        # self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.conv1 = nn.Conv2d(3, self.in_planes, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_planes)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.maxpool(F.relu(self.bn1(self.conv1(x))))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

# Model Selection
def ResNet18(n_C=10):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=n_C)

def ResNet34(n_C=10):
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=n_C)

def ResNet50(n_C=10):
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=n_C)

def ResNet101(n_C=10):
    return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=n_C)

def ResNet152(n_C=10):
    return ResNet(Bottleneck, [3, 8, 36, 3], num_classes=n_C)


!pip install wget
import wget
def download_n_load_model(model, model_name):
    if model_name in model_urls:
      if not os.path.exists(f"{model_name}.pth"):
        url = model_urls[model_name]
        wget.download(url, out=f"{model_name}.pth")
      rn_model_dict=model.state_dict()
      url = model_urls[model_name]
      pretrained_dict = torch.load(f'./{model_name}.pth')
      pretrained_dict = {k: v for k, v in pretrained_dict.items() if (k in rn_model_dict) and (rn_model_dict[k].shape == pretrained_dict[k].shape)}
      rn_model_dict.update(pretrained_dict)
      model.load_state_dict(rn_model_dict)
      print(f"load checkpoint...{model_name}")


Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25ldone
[?25h  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9656 sha256=28a33b1e0ff57e5f0ffe882ab055daa6c9a03782afefa511cb157b2ba57edb85
  Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [9]:
import torch.optim as optim
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ResNet34(n_C=10)
model.to(device)
summary(model, (3, 224, 224))

download_n_load_model(model, 'resnet34')

# Loss function
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),
    lr=0.01,
    momentum=0.9,
    weight_decay=5e-4
)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
epoch_number = 0

EPOCHS = 19
path_save_cp = './cp/'
best_vloss = 1_000_000.
training_logs = {"train_loss": [],  "train_acc": [], "validate_loss": [], "validate_acc": []}

t_0_accelerated = time.time()
for epoch in range(EPOCHS):
    train_loss, train_correct = 0, 0
    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        inputs, labels = data[0].to(device), data[1].to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        train_loss += loss.item()
        train_correct += (outputs.argmax(1) == labels).float().sum().item()
    scheduler.step()

    training_logs["train_loss"].append(train_loss / len(training_loader))
    training_logs["train_acc"].append(train_correct / len(training_loader.dataset))

    running_vloss = 0.0
    # Set the model to evaluation mode, disabling dropout and using population
    # statistics for batch normalization.
    model.eval()
    # Disable gradient computation and reduce memory consumption.
    valid_loss, valid_correct = 0, 0
    with torch.no_grad():
        for i, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata[0].to(device), vdata[1].to(device)
            voutputs = model(vinputs)
            vloss = loss_fn(voutputs, vlabels)
            valid_loss += loss_fn(voutputs, vlabels).item()
            valid_correct += (voutputs.argmax(1) == vlabels).float().sum().item()
        # save validation logs
        training_logs["validate_loss"].append(valid_loss / len(validation_loader))
        training_logs["validate_acc"].append(valid_correct / len(validation_loader.dataset))

    if epoch % 1 == 0:
        print(f"Epochs {epoch+1}".ljust(10),
            f"train loss {training_logs['train_loss'][-1]:.5f}",
            f"train acc {training_logs['train_acc'][-1]:.5f}",

            f"validate loss {training_logs['validate_loss'][-1]:.5f}",
            f"validate acc {training_logs['validate_acc'][-1]:.5f}",
            )
        print("-"*80)

    # Track best performance, and save the model's state
    if valid_loss < best_vloss:
        best_vloss = valid_loss
        # model_path = 'model_{}_{}'.format(timestamp, epoch_number)
        if not os.path.exists(path_save_cp): os.mkdir(path_save_cp)
        torch.save(model.state_dict(), path_save_cp+'best_pretrainedmodel.pth')

    epoch_number += 1

t_end_accelerated = time.time()-t_0_accelerated
print(f"Time consumption for accelerated CUDA training (device:{device}): {t_end_accelerated} sec")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
         MaxPool2d-3           [-1, 64, 56, 56]               0
            Conv2d-4           [-1, 64, 56, 56]          36,864
       BatchNorm2d-5           [-1, 64, 56, 56]             128
            Conv2d-6           [-1, 64, 56, 56]          36,864
       BatchNorm2d-7           [-1, 64, 56, 56]             128
        BasicBlock-8           [-1, 64, 56, 56]               0
            Conv2d-9           [-1, 64, 56, 56]          36,864
      BatchNorm2d-10           [-1, 64, 56, 56]             128
           Conv2d-11           [-1, 64, 56, 56]          36,864
      BatchNorm2d-12           [-1, 64, 56, 56]             128
       BasicBlock-13           [-1, 64, 56, 56]               0
           Conv2d-14           [-1, 64,

  pretrained_dict = torch.load(f'./{model_name}.pth')


load checkpoint...resnet34
Epochs 1   train loss 1.88309 train acc 0.32563 validate loss 1.16427 validate acc 0.61876
--------------------------------------------------------------------------------
Epochs 2   train loss 0.69431 train acc 0.76721 validate loss 0.88033 validate acc 0.72056
--------------------------------------------------------------------------------
Epochs 3   train loss 0.38212 train acc 0.87161 validate loss 0.70578 validate acc 0.78842
--------------------------------------------------------------------------------
Epochs 4   train loss 0.22253 train acc 0.92914 validate loss 0.72517 validate acc 0.78244
--------------------------------------------------------------------------------
Epochs 5   train loss 0.13274 train acc 0.95313 validate loss 0.67083 validate acc 0.83234
--------------------------------------------------------------------------------
Epochs 6   train loss 0.09891 train acc 0.97046 validate loss 0.59092 validate acc 0.84232
----------------------

KeyboardInterrupt: 

In [None]:
import torch

PATH = '/kaggle/working/cp/best_pretrainedmodel.pth'
model = ResNet34(n_C=10)
model.load_state_dict(torch.load(PATH), strict=False)
model.to(device).eval()

acc_test = 0
test_loss = 0

# Ensure no gradient computation is performed
with torch.no_grad():
    for tinputs, tlabels in validation_loader:
        tinputs, tlabels = tinputs.to(device), tlabels.to(device)
        toutputs = model(tinputs)
        
        # Compute loss
        loss = loss_fn(toutputs, tlabels)
        test_loss += loss.item()
        
        # Compute accuracy
        _, preds_t = torch.max(toutputs, 1)
        acc_test += (preds_t == tlabels).float().sum().item()

# Calculate average metrics
accuracy_t = round(acc_test / len(validation_loader.dataset) * 100, 2)
avg_tloss = test_loss / len(validation_loader)

print(f'[Test loss: {avg_tloss}] [Accuracy test: {accuracy_t}%]')


In [None]:
import pandas as pd

model.eval()
df = pd.read_csv("/kaggle/input/dip-lab-hackathon-2024-image-classification/sample-submission.csv")
image_dir = '/kaggle/input/dip-lab-hackathon-2024-image-classification/data/test/data'
predictions = []

for idx, row in df.iterrows():
    image_path = os.path.join(image_dir, row['ID'])
    image = Image.open(image_path)
    
    image = val_transform(image).unsqueeze(0)
    
    if torch.cuda.is_available():
        image = image.cuda()
        loaded_model = model.cuda()
    
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
    
    predictions.append(predicted.item())

df['predicted_class'] = predictions

df.to_csv('/kaggle/working/submission.csv', index=False)

print("CSV file updated with predictions.")

In [None]:
import os
os.environ['NCCL_DEBUG'] = 'INFO'