In [4]:
import pandas as pd 

#Load and explore new csv data after preprocessing
final_df_train = pd.read_csv("E:/Mine/Skin_cancer_detection/isic-2024-challenge/final_train_data.csv",  low_memory=False)
columns_to_drop = ['sex_nan', "tbp_lv_location_simple_Unknown", "anatom_site_general_nan"] 
final_df_train.drop(columns=columns_to_drop, inplace=True)
final_df_train.head()


Unnamed: 0,isic_id,target,patient_id,age_approx,clin_size_long_diam_mm,tbp_lv_A,tbp_lv_Aext,tbp_lv_B,tbp_lv_Bext,tbp_lv_C,...,tbp_lv_location_simple_Left Leg,tbp_lv_location_simple_Right Arm,tbp_lv_location_simple_Right Leg,tbp_lv_location_simple_Torso Back,tbp_lv_location_simple_Torso Front,anatom_site_general_anterior torso,anatom_site_general_head/neck,anatom_site_general_lower extremity,anatom_site_general_posterior torso,anatom_site_general_upper extremity
0,ISIC_0015670,0,IP_1235828,60.0,3.04,20.244422,16.261975,26.922447,23.954773,33.684638,...,0,0,1,0,0,0,0,1,0,0
1,ISIC_0015845,0,IP_8170065,60.0,1.1,31.71257,25.36474,26.331,24.54929,41.21903,...,0,0,0,0,0,0,1,0,0,0
2,ISIC_0015864,0,IP_6724798,60.0,3.4,22.57583,17.12817,37.97046,33.48541,44.17492,...,0,0,0,1,0,0,0,0,1,0
3,ISIC_0015902,0,IP_4111386,65.0,3.22,14.242329,12.164757,21.448144,21.121356,25.7462,...,0,0,0,0,1,1,0,0,0,0
4,ISIC_0024200,0,IP_8313778,55.0,2.73,24.72552,20.05747,26.4649,25.71046,36.21798,...,0,0,0,0,1,1,0,0,0,0


In [5]:
import h5py
from sklearn.model_selection import train_test_split
# Load the metadata
# metadata_df = pd.read_csv('train-metadata.csv')
train_hd5 = "E:/Mine/Skin_cancer_detection/isic-2024-challenge/train-image.hdf5"

# Load the HDF5 file containing the images
hdf5_file = h5py.File(train_hd5, 'r')

# Get the image IDs (isic_id)
image_ids = list(hdf5_file.keys())

# Ensure the metadata is consistent with the image IDs
final_df_train = final_df_train[final_df_train['isic_id'].isin(image_ids)]

# Split the dataset into 80% for training and 20% for validation
train_ids, val_ids = train_test_split(final_df_train, test_size=0.2, random_state=42, stratify=final_df_train['target'])  # Stratified split to maintain class balance


In [6]:
train_ids.target.value_counts()

target
0    320533
1       314
Name: count, dtype: int64

In [7]:
val_ids.target.value_counts()

target
0    80133
1       79
Name: count, dtype: int64

In [3]:
print(f'train size: {train_ids.shape}, val size: {val_ids.shape}')

train size: (79, 98), val size: (20, 98)


In [4]:
import sys
sys.path.append('../src')
sys.path.append('../Models')
from CNN_model import EfficientNetTrainer
# from src import dataloader
from dataloader import ISICDataset
from torch.utils.data import Dataset, DataLoader
from augmentation import augmentation_pipeline


In [5]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

image_size = 224  

transforms_train = A.Compose([
    A.Transpose(p=0.5),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, p=0.75),
    A.OneOf([
        A.MotionBlur(blur_limit=5),
        A.MedianBlur(blur_limit=5),
        A.GaussianBlur(blur_limit=5),
        A.GaussNoise(var_limit=(5.0, 30.0)),
    ], p=0.7),
    A.OneOf([
        A.OpticalDistortion(distort_limit=1.0),
        A.GridDistortion(num_steps=5, distort_limit=1.),
        A.ElasticTransform(alpha=3),
    ], p=0.7),
    A.CLAHE(clip_limit=4.0, p=0.7),
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.85),
    A.Resize(image_size, image_size),
    A.CoarseDropout(max_holes=1, max_height=int(image_size * 0.375), max_width=int(image_size * 0.375), fill_value=0, p=0.7),
    ToTensorV2(),
    
])
transformations_valid = A.Compose([
    A.Resize(image_size,image_size ),
    A.Normalize(
            mean=[0.4815, 0.4578, 0.4082], 
            std=[0.2686, 0.2613, 0.2758], 
            max_pixel_value=255.0,
            p=1.0
        ),
    ToTensorV2(),
    ], p=1.)

def create_dataloader(hd5file, csvfile, transform, batch_size=32):
    dataset = ISICDataset(hd5file,csvfile, transform)
    return DataLoader(dataset, batch_size=batch_size, shuffle= True)

train_hd5 = "E:/Mine/Skin_cancer_detection/isic-2024-challenge/train-image.hdf5"
train_loader = create_dataloader(train_ids,train_hd5, transforms_train)
val_loader = create_dataloader(val_ids,train_hd5, transformations_valid)


  validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)


In [6]:
val_loader.dataset.targets.dtype

dtype('int64')

In [8]:
trainer = EfficientNetTrainer(num_classes=2, learning_rate=1e-4)
# trainer.train(train_loader, val_loader, epochs=5)
# trainer.save_model('efficientnet_best.pth')
trainer.model



EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [None]:

# trainer.save_model('efficientnet_best.pth')


In [2]:
# import torch
# import numpy as np

# tensor = torch.randn(3, 224, 224)
# array = tensor.numpy()
# tensor_back = torch.from_numpy(array)

# # print(np.__version__,":", torch.__version__)
