<a href="https://colab.research.google.com/github/BBotond03/SkinCancerDetection/blob/main/main_transfer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Script for Downloading the Data
1. We download the data  through the Kaggle API (for this we will have to provide out personal API key, this can be found at Kaggle profile settings).
2. Extract the downloaded zip file

In [9]:
#to be able to download the dataset to our current runtime
!pip install kaggle



In [10]:
from google.colab import files

# Upload the file
uploaded = files.upload()

# Get the actual file name from the dictionary
file_name = list(uploaded.keys())[0]
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Save the API key securely without displaying it
with open('/root/.kaggle/kaggle.json', 'wb') as f:
    f.write(uploaded[file_name])

# Set permissions
!chmod 600 /root/.kaggle/kaggle.json


Saving kaggle (2).json to kaggle (2) (1).json
mkdir: cannot create directory ‘/root/.kaggle’: File exists
cp: cannot stat 'kaggle.json': No such file or directory


In [11]:
!kaggle competitions download -c isic-2024-challenge

isic-2024-challenge.zip: Skipping, found more recently modified local copy (use --force to force download)


In [12]:
import zipfile
import os

# Path to your zip file and extract location
zip_file_path = 'isic-2024-challenge.zip'
extract_to_path = 'data'

# Extracting the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to_path)
    print("Extraction completed.")

Extraction completed.


# Data Preproccessing

In [13]:
!pip install tensorflow



In [14]:
#neccesary imports
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
import tensorflow_datasets as tfds
import shutil
import pandas as pd
import matplotlib.pyplot as plt


In [15]:
# Load CSV
csv_path = './data/train-metadata.csv'
df = pd.read_csv(csv_path, low_memory=False)

# Append '.jpg' to each isic_id to match image filenames
df['isic_id'] = df['isic_id'].astype(str) + '.jpg'

# Directory where images are stored
img_dir = './data/train-image/image'  # Ensure this path is correct

# Filter for files that actually exist
df['filepath'] = df['isic_id'].apply(lambda x: os.path.join(img_dir, x))
df = df[df['filepath'].apply(os.path.exists)]

# Print out a summary
print(f"Number of valid images after filtering: {len(df)}")
print(df[['isic_id', 'filepath']].head())  # Optional: to verify paths

# Convert `target` to string for binary classification
df['target'] = df['target'].astype(str)

# Define ImageDataGenerator
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Create generator
train_gen = datagen.flow_from_dataframe(
    dataframe=df,
    directory=img_dir,
    x_col='isic_id',       # filename column
    y_col='target',         # target column
    target_size=(150, 150), # adjust as needed
    class_mode='binary',    # for binary classification
    subset='training',
    shuffle=True
)

val_gen = datagen.flow_from_dataframe(
    dataframe=df,
    directory=img_dir,
    x_col='isic_id',
    y_col='target',
    target_size=(150, 150),
    class_mode='binary',
    subset='validation'
)


Number of valid images after filtering: 401059
            isic_id                                   filepath
0  ISIC_0015670.jpg  ./data/train-image/image/ISIC_0015670.jpg
1  ISIC_0015845.jpg  ./data/train-image/image/ISIC_0015845.jpg
2  ISIC_0015864.jpg  ./data/train-image/image/ISIC_0015864.jpg
3  ISIC_0015902.jpg  ./data/train-image/image/ISIC_0015902.jpg
4  ISIC_0024200.jpg  ./data/train-image/image/ISIC_0024200.jpg
Found 320848 validated image filenames belonging to 2 classes.
Found 80211 validated image filenames belonging to 2 classes.


In [16]:
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.utils import class_weight
import numpy as np
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import Dropout

In [None]:
# Define image size and batch size
IMG_SIZE = (150, 150)
BATCH_SIZE = 32
EPOCHS = 5

# Initialize the InceptionV3 model
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
base_model.trainable = False  # Freeze base model

# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    verbose=1,
    min_lr=1e-7
)

# Define the model
model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# Summary
model.summary()

# Calculate class weights
labels = df['target'].astype(int).values  # Convert target to integer values (0 or 1)
class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
class_weights_dict = {0: class_weights[0], 1: class_weights[1]}

print(f"Class Weights: {class_weights_dict}")

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint(
    filepath='best_melanoma_inceptionv3.keras',
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    verbose=1
)

# Train the model with class weights and callbacks

steps_per_epoch = len(train_gen)
validation_steps = len(val_gen)

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    callbacks=[early_stopping, model_checkpoint, lr_scheduler],
    class_weight=class_weights_dict
)

# Unfreeze some layers for fine-tuning
base_model.trainable = True
fine_tune_at = 249

for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

# Recompile with a lower learning rate
model.compile(optimizer=Adam(learning_rate=0.00001), loss='binary_crossentropy', metrics=['accuracy'])

# Fine-tune the model with class weights and callbacks
steps_per_epoch = len(train_gen)
validation_steps = len(val_gen)

history_fine = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS // 2,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    callbacks=[early_stopping, model_checkpoint, lr_scheduler],
    class_weight=class_weights_dict
)

# Load the best saved model
best_model = tf.keras.models.load_model('best_melanoma_inceptionv3.keras')

# Evaluate the best model
loss, accuracy = best_model.evaluate(val_gen)
print(f"Best Model Validation Accuracy: {accuracy:.2f}")

Class Weights: {0: 0.5004904334283418, 1: 510.25318066157763}
Epoch 1/5


  self._warn_if_super_not_called()


[1m10026/10027[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 45ms/step - accuracy: 0.9056 - loss: 4.9131
Epoch 1: val_loss improved from inf to 0.22110, saving model to best_melanoma_inceptionv3.keras
[1m10027/10027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m592s[0m 57ms/step - accuracy: 0.9056 - loss: 4.9128 - val_accuracy: 0.9990 - val_loss: 0.2211 - learning_rate: 1.0000e-04
Epoch 2/5
[1m10027/10027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 3/5


  self.gen.throw(typ, value, traceback)
  current = self.get_monitor_value(logs)
  self._save_model(epoch=epoch, batch=None, logs=logs)
  callback.on_epoch_end(epoch, logs)


[1m10027/10027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.7710 - loss: 2.4664
Epoch 3: val_loss improved from 0.22110 to 0.10390, saving model to best_melanoma_inceptionv3.keras
[1m10027/10027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m498s[0m 50ms/step - accuracy: 0.7710 - loss: 2.4664 - val_accuracy: 0.9982 - val_loss: 0.1039 - learning_rate: 1.0000e-04
Epoch 4/5
[1m10027/10027[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 5/5
[1m 9769/10027[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m9s[0m 37ms/step - accuracy: 0.7802 - loss: 3.0832