In [None]:
!git clone https://github.com/Kalash1106/ML_GC_2K24

In [None]:
cd /kaggle/working/ML_GC_2K24

In [None]:
import json

# Opening JSON file
f = open('config.json')
params = json.load(f)

params

In [None]:
#### assign PARAMS ###
params['train_image_folder'] = "/kaggle/input/aiml-general-championship/KCDH2024_Training_Input_10K/KCDH2024_Training_Input_10K"
params['gt_file'] = "/kaggle/working/ML_GC_2K24/data/KCDH2024_Training_GroundTruth.csv"
params['mapping_file'] = "/kaggle/working/ML_GC_2K24/utility/disease_id.json"

params['eval_image_folder'] = "/kaggle/input/aiml-general-championship/KCDH2024_Test_Input/KCDH2024_Test_Input"
params['eval_labels'] = "/kaggle/working/ML_GC_2K24/data/eval_labels.csv"
params['train_batch_size'] = 64

params

In [None]:
import os

import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import recall_score

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torchvision import models
from torchvision import transforms

# from PIL import Image
# from tqdm import tqdm

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from multimodel_pipeline import Classify, CustomDataset, get_splits, train_model

In [None]:
mappings = {
    "MEL": 0,
    "NV": 1,
    "BCC": 0,
    "AKIEC": 0,
    "BKL": 0,
    "DF" : 0,
    "VASC" : 0
  }
inv_mappings = {
    0:"OTHER",
    1:"NV",
}
gt_file = '/kaggle/working/ML_GC_2K24/data/KCDH2024_Training_GroundTruth.csv'


class_obj = Classify(mappings, inv_mappings, gt_file)

In [None]:
##### Define Model Based on NUM_CLASSES #######
NUM_CLASSES = class_obj.num_classes

#resnet 34 model
# model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT) 

#resnet 18 model with xavier
# model = models.resnet18(weights=None)
# num_features = model.fc.in_features
# model.fc = torch.nn.Linear(num_features, NUM_CLASSES)
# torch.nn.init.xavier_uniform_(model.fc.weight)

#pretrained model loading for further training
checkpoint = torch.load('/kaggle/input/resnet-weights/checkpoint (4).pth') # path of .pth file
model = checkpoint['model']
model.load_state_dict(checkpoint['weights']['last_weights']) #loading the last epoch weights

#model parameters
print("Total parameters:", sum(p.numel() for p in model.parameters()), 
      ", Trainable parameters:", sum(p.numel() for p in model.parameters() if p.requires_grad), 
      ", Non-trainable parameters:", sum(p.numel() for p in model.parameters() if not p.requires_grad))
print()


##### Defining Dataset and Dataloader ######
NUM_WORKERS = 4

train_df, val_df = get_splits(class_obj.clean_df, params['test_size'], stratify=True)
test_df = pd.read_csv(params['eval_labels'], header=None).rename(columns={0: 'image'})

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.2),
    transforms.RandomRotation(degrees=20),
    transforms.RandomChoice([
            transforms.RandomPerspective(0.5, 0.5),
            transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
            transforms.ElasticTransform(alpha=20.0, sigma=5.0)
        ], p=[0.3, 0.1, 0.2]),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
#     model_preprocess,
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
#     model_preprocess,
])

train_ds = CustomDataset(dataframe=train_df, root_dir=params['train_image_folder'], transform=train_transform)
val_ds = CustomDataset(dataframe=val_df, root_dir=params['train_image_folder'], transform=val_transform)
test_ds = CustomDataset(dataframe=test_df, root_dir=params['eval_image_folder'], transform=val_transform, is_test=True)

# Create DataLoader with prefetch and pin memory
train_dl = DataLoader(train_ds, batch_size=params['train_batch_size'], shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size=params['test_batch_size'], shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_dl = DataLoader(test_ds, batch_size=params['eval_batch_size'], shuffle=False, num_workers=NUM_WORKERS, pin_memory=False)
print()
print(f"train_dl batch shape: input- {next(iter(train_dl))[0].shape}, labels- {next(iter(train_dl))[1].shape}")
print(f"val_dl batch shape: input- {next(iter(val_dl))[0].shape}, labels- {next(iter(val_dl))[1].shape}")
print(f"test_dl batch shape: input- {next(iter(test_dl))[0].shape}, labels- {len(next(iter(test_dl))[1])}")

In [None]:
## define criterion, optimizer, scheduler and off we go for training #########
LEARNING_RATE = 0.001
NUM_EPOCHS = 35

# criterion = RobustAsymmetricLoss(class_weights)
# criterion = WeightedFocalLoss(CLASS_WEIGHTS, gamma=1.5)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=7, verbose=True, min_lr=1e-6)

trained_model, weights = train_model(model, train_dl, val_dl, criterion, optimizer, NUM_EPOCHS, DEVICE, scheduler)

In [None]:
## saving the model and weights dict as a checkpoint
checkpoint = {
    'model': trained_model,
    'weights': weights
}
torch.save(checkpoint, '/kaggle/working/checkpoint.pth')

In [None]:
checkpoint = torch.load('/kaggle/working/checkpoint.pth')
trained_model = checkpoint['model'] #load trained model from checkpoint

final_df = class_obj.get_final_df(trained_model, test_dl, device=DEVICE)
final_df

In [None]:
final_df.to_csv('/kaggle/working/final_NV_not_NV.csv')

In [None]:
### cheking against truth labels from csv file
from utility.utils import correct_metrices
predction_csv_path = '/kaggle/working/final_NV_not_NV.csv'
report = correct_metrices(predction_csv_path)