In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

2024-08-08 07:52:54.212519: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-08 07:52:54.212657: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-08 07:52:54.355427: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


**Load Metadata**

In [2]:
# Load metadata
metadata_path = '/kaggle/input/isic-2024-challenge/train-metadata.csv'
metadata = pd.read_csv(metadata_path)

# Print the first few rows of the DataFrame to check if data is loaded correctly
metadata.head()



  metadata = pd.read_csv(metadata_path)


Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,lesion_id,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,,Benign,Benign,,,,,,,97.517282
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.1,TBP tile: close-up,3D: white,31.71257,...,IL_6727506,Benign,Benign,,,,,,,3.141455
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.4,TBP tile: close-up,3D: XP,22.57583,...,,Benign,Benign,,,,,,,99.80404
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,,Benign,Benign,,,,,,,99.989998
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.72552,...,,Benign,Benign,,,,,,,70.44251


**Image Batch Generate**

In [3]:
# Print column names to verify the presence of 'isic_id'
print(metadata.columns)


Index(['isic_id', 'target', 'patient_id', 'age_approx', 'sex',
       'anatom_site_general', 'clin_size_long_diam_mm', 'image_type',
       'tbp_tile_type', 'tbp_lv_A', 'tbp_lv_Aext', 'tbp_lv_B', 'tbp_lv_Bext',
       'tbp_lv_C', 'tbp_lv_Cext', 'tbp_lv_H', 'tbp_lv_Hext', 'tbp_lv_L',
       'tbp_lv_Lext', 'tbp_lv_areaMM2', 'tbp_lv_area_perim_ratio',
       'tbp_lv_color_std_mean', 'tbp_lv_deltaA', 'tbp_lv_deltaB',
       'tbp_lv_deltaL', 'tbp_lv_deltaLB', 'tbp_lv_deltaLBnorm',
       'tbp_lv_eccentricity', 'tbp_lv_location', 'tbp_lv_location_simple',
       'tbp_lv_minorAxisMM', 'tbp_lv_nevi_confidence', 'tbp_lv_norm_border',
       'tbp_lv_norm_color', 'tbp_lv_perimeterMM',
       'tbp_lv_radial_color_std_max', 'tbp_lv_stdL', 'tbp_lv_stdLExt',
       'tbp_lv_symm_2axis', 'tbp_lv_symm_2axis_angle', 'tbp_lv_x', 'tbp_lv_y',
       'tbp_lv_z', 'attribution', 'copyright_license', 'lesion_id',
       'iddx_full', 'iddx_1', 'iddx_2', 'iddx_3', 'iddx_4', 'iddx_5',
       'mel_mitotic_index', '

**Define the Model**

In [4]:
# Check for missing or empty values in 'isic_id' column
missing_ids = metadata['isic_id'].isnull().sum()
empty_ids = (metadata['isic_id'].str.strip() == '').sum()
print(f"Missing 'isic_id' values: {missing_ids}")
print(f"Empty 'isic_id' values: {empty_ids}")

Missing 'isic_id' values: 0
Empty 'isic_id' values: 0


In [5]:
import os
# Append .jpg to each filename in the 'isic_id' column
metadata['isic_id'] = metadata['isic_id'].apply(lambda x: f"{x}.jpg")

# Check if the image files exist
metadata['file_exists'] = metadata['isic_id'].apply(lambda x: os.path.isfile(os.path.join('/kaggle/input/isic-2024-challenge/train-image/image', x)))
print(metadata['file_exists'].value_counts())

# Filter out rows where the image file does not exist
metadata = metadata[metadata['file_exists']]

# If no files exist, raise an error
if metadata.empty:
    raise ValueError("No valid image files found. Please check the filenames and directory.")

file_exists
True    401059
Name: count, dtype: int64


In [6]:
# Create the 'label' column
if 'tbp_lv_nevi_confidence' in metadata.columns:
    metadata['label'] = metadata['tbp_lv_nevi_confidence'].apply(lambda x: 1 if x > 0.5 else 0)
else:
    raise ValueError("The necessary column for labels is missing from the metadata.")

In [7]:
# Create train and validation splits
train_df, val_df = train_test_split(metadata, test_size=0.2, random_state=0)


In [8]:
train_df['label'] = train_df['label'].astype(str)
val_df['label'] = val_df['label'].astype(str)

In [9]:
# Convert labels to strings
train_df['label'] = train_df['label'].astype(str)
val_df['label'] = val_df['label'].astype(str)

# Image data generators with augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Create generators
train_generator = train_datagen.flow_from_dataframe(
    train_df,
    directory='/kaggle/input/isic-2024-challenge/train-image/image',
    x_col='isic_id',  # Adjust if your image filenames are in a different column
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_dataframe(
    val_df,
    directory='/kaggle/input/isic-2024-challenge/train-image/image',
    x_col='isic_id',  # Adjust if your image filenames are in a different column
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

Found 320847 validated image filenames belonging to 2 classes.
Found 80212 validated image filenames belonging to 2 classes.


In [10]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [11]:
# Load the EfficientNetB0 model with pre-trained weights, without the top layer
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [12]:
# Add custom layers on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

In [13]:
# Define the model
model = Model(inputs=base_model.input, outputs=predictions)

In [14]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [15]:
# Set up callbacks
checkpoint = ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, mode='max', verbose=1)

In [16]:
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=val_generator,
    validation_steps=val_generator.samples // val_generator.batch_size,
    epochs=5,  # Reduced for testing
    verbose=2
)


Epoch 1/5


  self._warn_if_super_not_called()
I0000 00:00:1723104243.615547      65 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


10026/10026 - 5144s - 513ms/step - accuracy: 0.8746 - loss: 0.2777 - val_accuracy: 0.8842 - val_loss: 0.2547
Epoch 2/5


  self.gen.throw(typ, value, traceback)


10026/10026 - 7s - 678us/step - accuracy: 0.9688 - loss: 0.1534 - val_accuracy: 0.9000 - val_loss: 0.1727
Epoch 3/5
10026/10026 - 4056s - 405ms/step - accuracy: 0.8876 - loss: 0.2498 - val_accuracy: 0.8876 - val_loss: 0.2500
Epoch 4/5
10026/10026 - 0s - 18us/step - accuracy: 0.9062 - loss: 0.2463 - val_accuracy: 0.9000 - val_loss: 0.1972
Epoch 5/5
10026/10026 - 3944s - 393ms/step - accuracy: 0.8918 - loss: 0.2405 - val_accuracy: 0.8946 - val_loss: 0.2323
