In [1]:
!git clone https://github.com/Kalash1106/ML_GC_2K24

Cloning into 'ML_GC_2K24'...
remote: Enumerating objects: 151, done.[K
remote: Counting objects: 100% (85/85), done.[K
remote: Compressing objects: 100% (75/75), done.[K
remote: Total 151 (delta 43), reused 22 (delta 10), pack-reused 66[K
Receiving objects: 100% (151/151), 94.81 KiB | 7.90 MiB/s, done.
Resolving deltas: 100% (66/66), done.


In [2]:
cd /kaggle/working/ML_GC_2K24

/kaggle/working/ML_GC_2K24


In [3]:
import json

# Opening JSON file
f = open('config.json')
params = json.load(f)

params

{'train_image_folder': 'data/KCDH2024_Training_Input_10K',
 'gt_file': 'data/KCDH2024_Training_GroundTruth.csv',
 'mapping_file': 'utility/disease_id.json',
 'eval_image_folder': 'data/KCDH2024_Test_Input',
 'eval_labels': 'data/eval_labels.csv',
 'model_path': 'weights/ResNet18',
 'submission_csv_path': 'submission.csv',
 'img_size': 224,
 'test_size': 0.15,
 'pretrained': 1,
 'num_epochs': 10,
 'train_batch_size': 32,
 'test_batch_size': 32,
 'eval_batch_size': 32,
 'class_weights': [1.4, 1.0, 1.91, 2.36, 1.37, 5.0, 4.49]}

In [4]:
#### assign PARAMS ###
params['train_image_folder'] = "/kaggle/input/aiml-general-championship/KCDH2024_Training_Input_10K/KCDH2024_Training_Input_10K"
params['gt_file'] = "/kaggle/working/ML_GC_2K24/data/KCDH2024_Training_GroundTruth.csv"
params['mapping_file'] = "/kaggle/working/ML_GC_2K24/utility/disease_id.json"

params['eval_image_folder'] = "/kaggle/input/aiml-general-championship/KCDH2024_Test_Input/KCDH2024_Test_Input"
params['eval_labels'] = "/kaggle/working/ML_GC_2K24/data/eval_labels.csv"
params['train_batch_size'] = 64

params

{'train_image_folder': '/kaggle/input/aiml-general-championship/KCDH2024_Training_Input_10K/KCDH2024_Training_Input_10K',
 'gt_file': '/kaggle/working/ML_GC_2K24/data/KCDH2024_Training_GroundTruth.csv',
 'mapping_file': '/kaggle/working/ML_GC_2K24/utility/disease_id.json',
 'eval_image_folder': '/kaggle/input/aiml-general-championship/KCDH2024_Test_Input/KCDH2024_Test_Input',
 'eval_labels': '/kaggle/working/ML_GC_2K24/data/eval_labels.csv',
 'model_path': 'weights/ResNet18',
 'submission_csv_path': 'submission.csv',
 'img_size': 224,
 'test_size': 0.15,
 'pretrained': 1,
 'num_epochs': 10,
 'train_batch_size': 64,
 'test_batch_size': 32,
 'eval_batch_size': 32,
 'class_weights': [1.4, 1.0, 1.91, 2.36, 1.37, 5.0, 4.49]}

In [5]:
import os

import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import recall_score

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
from torchvision import models
from torchvision import transforms

# from PIL import Image
# from tqdm import tqdm

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
from multimodel_pipeline import Classify, CustomDataset, get_splits, train_model

In [7]:
mappings = {
    "MEL": 0,
    "NV": 1,
    "BCC": 0,
    "AKIEC": 0,
    "BKL": 0,
    "DF" : 0,
    "VASC" : 0
  }
inv_mappings = {
    0:"OTHER",
    1:"NV",
}
gt_file = '/kaggle/working/ML_GC_2K24/data/KCDH2024_Training_GroundTruth.csv'


class_obj = Classify(mappings, inv_mappings, gt_file)

In [8]:
##### Define Model Based on NUM_CLASSES #######
NUM_CLASSES = class_obj.num_classes
model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
num_features = model.fc.in_features
model.fc = torch.nn.Linear(num_features, NUM_CLASSES)
torch.nn.init.xavier_uniform_(model.fc.weight)
print("Total parameters:", sum(p.numel() for p in model.parameters()), 
      ", Trainable parameters:", sum(p.numel() for p in model.parameters() if p.requires_grad), 
      ", Non-trainable parameters:", sum(p.numel() for p in model.parameters() if not p.requires_grad))
print()


##### Defining Dataset and Dataloader ######
NUM_WORKERS = 4

train_df, val_df = get_splits(class_obj.clean_df, params['test_size'], stratify=True)
test_df = pd.read_csv(params['eval_labels'], header=None).rename(columns={0: 'image'})

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomRotation(degrees=20),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
#     model_preprocess,
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
#     model_preprocess,
])

train_ds = CustomDataset(dataframe=train_df, root_dir=params['train_image_folder'], transform=train_transform)
val_ds = CustomDataset(dataframe=val_df, root_dir=params['train_image_folder'], transform=val_transform)
test_ds = CustomDataset(dataframe=test_df, root_dir=params['eval_image_folder'], transform=val_transform, is_test=True)

# Create DataLoader with prefetch and pin memory
train_dl = DataLoader(train_ds, batch_size=params['train_batch_size'], shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size=params['test_batch_size'], shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_dl = DataLoader(test_ds, batch_size=params['eval_batch_size'], shuffle=False, num_workers=NUM_WORKERS, pin_memory=False)
print()
print(f"train_dl batch shape: input- {next(iter(train_dl))[0].shape}, labels- {next(iter(train_dl))[1].shape}")
print(f"val_dl batch shape: input- {next(iter(val_dl))[0].shape}, labels- {next(iter(val_dl))[1].shape}")
print(f"test_dl batch shape: input- {next(iter(test_dl))[0].shape}, labels- {len(next(iter(test_dl))[1])}")

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 167MB/s] 


Total parameters: 21285698 , Trainable parameters: 21285698 , Non-trainable parameters: 0


train_dl batch shape: input- torch.Size([64, 3, 224, 224]), labels- torch.Size([64])
val_dl batch shape: input- torch.Size([32, 3, 224, 224]), labels- torch.Size([32])
test_dl batch shape: input- torch.Size([32, 3, 224, 224]), labels- 32


In [None]:
## define criterion, optimizer, scheduler and off we go for training #########
LEARNING_RATE = 0.001
NUM_EPOCHS = 20

# criterion = RobustAsymmetricLoss(class_weights)
# criterion = WeightedFocalLoss(CLASS_WEIGHTS, gamma=1.5)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)


trained_model, weights = train_model(model, train_dl, val_dl, criterion, optimizer, NUM_EPOCHS, DEVICE)

Epoch 1/20 - Training: 100%|██████████| 128/128 [00:45<00:00,  2.84it/s]


Epoch 1/20 - Training, Loss: 0.5336, Accuracy: 0.7944


Epoch 1/20 - Validation: 100%|██████████| 45/45 [00:07<00:00,  6.08it/s]


Epoch 1/20 - Validation, Loss: 0.3652, Accuracy: 0.8354, Average Recall: 0.7953
---- Epoch 1/20 completed ---



Epoch 2/20 - Training: 100%|██████████| 128/128 [00:34<00:00,  3.67it/s]


Epoch 2/20 - Training, Loss: 0.3608, Accuracy: 0.8428


Epoch 2/20 - Validation: 100%|██████████| 45/45 [00:05<00:00,  7.95it/s]


Epoch 2/20 - Validation, Loss: 0.3169, Accuracy: 0.8604, Average Recall: 0.8286
---- Epoch 2/20 completed ---



Epoch 3/20 - Training:  20%|█▉        | 25/128 [00:07<00:35,  2.91it/s]

In [None]:
final_df = class_obj.get_final_df(trained_model, test_dl, device=DEVICE)
final_df

In [None]:
final_df.to_csv('/kaggle/working/final_NV_not_NV.csv')