In [None]:
import torch
print("cuDNN available:", torch.backends.cudnn.is_available())
print("cuDNN version:", torch.backends.cudnn.version())
print(torch.__version__)
print(torch.version.cuda)
print(torch.backends.cudnn.version())


In [None]:
import os
import re
import gc
import sys
import time
import random
import csv
import shutil
import pathlib
import pickle
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from numba import jit, cuda
from PIL import Image
import cv2

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
from torch.optim import Adadelta, SGD
from torchvision import models, transforms
from torch_optimizer import Lamb
import timm

import pytorch_lightning as pl
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.loggers import TensorBoardLogger

from torchmetrics import Metric, F1Score
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingWarmRestarts

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

from tqdm.notebook import tqdm

import json

# Global hyperparameters
THRESHOLD = 0.5
N_LABELS = 18
DROPOUT_RATE_1 = 0.5
MAX_EPOCHS = 40 
BATCH_SIZE = 64
RESIZE_DIM = 224
TEST_SPLIT_SIZE = 0.3
RANDOM_HORIZONTAL_FLIP_PROB = 0.5
RANDOM_VERTICAL_FLIP_PROB = 0.5
COLOR_JITTER_BRIGHTNESS = 0.2
COLOR_JITTER_CONTRAST = 0.2
COLOR_JITTER_SATURATION = 0.2
COLOR_JITTER_HUE = 0.1
RANDOM_ROTATION_DEGREES = 30
RANDOM_AFFINE_DEGREES = 0
RANDOM_AFFINE_TRANSLATE = (0.1, 0.1)
RANDOM_AFFINE_SCALE = (0.9, 1.1)
RANDOM_PERSPECTIVE_DISTORTION_SCALE = 0.2
RANDOM_PERSPECTIVE_PROB = 0.5
RANDOM_GRAYSCALE_PROB = 0.1
NORMALIZE_MEAN = [0.485, 0.456, 0.406]
NORMALIZE_STD = [0.229, 0.224, 0.225]

In [None]:
# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} for training")

# Path to the images and CSV file
img_dir = './data/images'
train_file = './data/train_binarized.csv'
test_file = './data/test.csv'

train_df = pd.read_csv(train_file)
test_df = pd.read_csv(test_file, usecols=['ImageID'])
train_df

In [None]:
class ImageDataset(Dataset):
    def __init__(self,df, img_dir, transform=None):
        self.df=df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx,0]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        labels = self.df.iloc[idx, 1:19].values.astype('float')
        
        if self.transform:
            image = self.transform(image)
        
        return img_name,image, labels

# Transformations for the images

transform = transforms.Compose([
    transforms.Resize((RESIZE_DIM, RESIZE_DIM)),  # Resize to 224x224
    transforms.RandomHorizontalFlip(p=RANDOM_HORIZONTAL_FLIP_PROB),  # Randomly flip horizontally
    transforms.RandomVerticalFlip(p=RANDOM_VERTICAL_FLIP_PROB),  # 50% chance of flipping vertically
    transforms.ColorJitter(brightness=COLOR_JITTER_BRIGHTNESS, contrast=COLOR_JITTER_CONTRAST, saturation=COLOR_JITTER_SATURATION, hue=COLOR_JITTER_HUE),  # Randomly change the brightness, contrast, saturation and hue
    transforms.RandomRotation(RANDOM_ROTATION_DEGREES),  # Randomly rotate the image by up to 30 degrees
    transforms.RandomAffine(degrees=RANDOM_AFFINE_DEGREES, translate=RANDOM_AFFINE_TRANSLATE, scale=RANDOM_AFFINE_SCALE),  # Random affine transformations with translation and scaling
    transforms.RandomPerspective(distortion_scale=RANDOM_PERSPECTIVE_DISTORTION_SCALE, p=RANDOM_PERSPECTIVE_PROB),  # Random perspective transformation with a 50% chance
    transforms.RandomGrayscale(p=RANDOM_GRAYSCALE_PROB),  # Randomly convert image to grayscale with a 10% chance
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=NORMALIZE_MEAN, std=NORMALIZE_STD)  # Normalize the tensor with mean and std for pre-trained models
])

test_transform = transforms.Compose([
    transforms.Resize((RESIZE_DIM, RESIZE_DIM)),
    transforms.ToTensor(),
    transforms.Normalize(mean=NORMALIZE_MEAN, std=NORMALIZE_STD)  # Normalization parameters for pre-trained models
])

In [None]:

train_set , val_set = train_test_split(train_df,test_size = TEST_SPLIT_SIZE)

train_dataset = ImageDataset(train_set,img_dir=img_dir, transform=transform)
val_dataset = ImageDataset(val_set,img_dir=img_dir, transform=transform)
test_dataset = ImageDataset(test_df,img_dir=img_dir, transform=test_transform)

train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size = BATCH_SIZE,
                                               shuffle = True,num_workers=5)
val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                               batch_size = BATCH_SIZE,
                                               shuffle = False,num_workers=5)
test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                               batch_size = BATCH_SIZE,
                                               shuffle = False,num_workers=5)



In [None]:
import torchvision.transforms.functional as F

image_name,image, label = train_dataset[0]  

image = F.to_pil_image(image)
print()
# Display the image
plt.imshow(image)
plt.title(f'Label: {image_name} {label} ')
plt.show()

In [None]:
# Backbone definition using ResNeXt50_32x4d
backbone = timm.create_model('resnext50_32x4d.a1h_in1k', pretrained=True)

backbone.reset_classifier(0)  

class MLCNNet(nn.Module):
    def __init__(self, backbone, n_classes):
        super(MLCNNet, self).__init__()
        self.model = backbone
        self.classifier = nn.Sequential(
            nn.Linear(2048, 512),
            nn.BatchNorm1d(512),  
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE_1),  
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE_1),  
            nn.Linear(256, n_classes)
        )

    def forward(self, x):
        x = self.model(x)
        x = self.classifier(x)
        return x

model = MLCNNet(backbone, N_LABELS)

In [None]:
import torch.nn.functional as F
from torchmetrics.classification import MultilabelF1Score

class LitMLCNet(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.f1_score = MultilabelF1Score(num_labels=N_LABELS, average='macro')

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        img_name, x, y = batch
        logits = self.model(x)
        loss = F.binary_cross_entropy_with_logits(logits, y)
        f1 = self.f1_score(logits.sigmoid(), y.int())
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_f1', f1, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {'loss': loss, 'f1': f1}

    def validation_step(self, batch, batch_idx):
        img_name, x, y = batch
        logits = self.model(x)
        loss = F.binary_cross_entropy_with_logits(logits, y)
        f1 = self.f1_score(logits.sigmoid(), y.int())
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_f1', f1, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return {'val_loss': loss, 'val_f1': f1}

    def configure_optimizers(self):
        optimizer = Lamb(self.parameters(), lr=0.001)
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1, eta_min=0)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "val_f1"}

    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        img_name, x, _ = batch  
        preds = self.forward(x)
        return preds, img_name


In [None]:
pl_Model = LitMLCNet(model)

torch.set_float32_matmul_precision('high')

weights_path = './FINAL_MODEL/FINAL_RESNEXT.pth'
pl_Model.load_state_dict(torch.load(weights_path),strict=False)

trainer = pl.Trainer()

# Uncomment if training model 

# trainer = pl.Trainer(
#     default_root_dir='./',
#     max_epochs=40,  # Increased epochs
#     log_every_n_steps=10,
#     accelerator='gpu',
#     devices=1,
#     logger=True
# )

# trainer.fit(pl_Model,
#             train_dataloader,
#             val_dataloader)

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./lightning_logs

In [None]:
preds_labels = trainer.predict(pl_Model, dataloaders=test_dataloader)

In [None]:
# List of possible labels
possible_labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19]

# Convert predictions to label format
def get_labels_from_logits(logits, threshold=THRESHOLD):
    return [str(possible_labels[i]) for i in range(len(logits)) if logits[i] >= threshold]

# Prepare the data for CSV
csv_data = []
for preds, img_name in preds_labels:
    preds = preds.sigmoid().cpu().numpy()
    img_name = img_name[0]  # Assuming img_name is a single-element list
    labels_str = " ".join(get_labels_from_logits(preds[0]))
    csv_data.append([img_name, labels_str])

# Write to CSV
csv_file = './FINAL_MODEL/510369965-490424191-490299418-ResNext-Predictions.csv'
with open(csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['ImageID', 'Labels'])
    writer.writerows(csv_data)

print(f"Predictions saved to {csv_file}")

In [None]:
#Raw JSON predictions for ensemble
predictions = []
for preds, img_names in preds_labels:
        for pred, img_name in zip(preds, img_names):
          predictions.append(torch.sigmoid(pred))

predictions = [tensor.tolist() for tensor in predictions]

with open('./processed-data/rawPredictions-resnet.json', 'w', encoding='utf-8') as f:
    json.dump(predictions, f, ensure_ascii=False, indent=4)