In [1]:
import os
import pandas as pd
import numpy as np
import shutil
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [4]:
# URLs for data and metadata
image_data_url = "https://isic-challenge-data.s3.amazonaws.com/2020/ISIC_2020_Training_JPEG.zip"
metadata_url = "https://isic-challenge-data.s3.amazonaws.com/2020/ISIC_2020_Training_GroundTruth.csv"

In [3]:
# Directory for data
data_dir = './data'

In [5]:
# Download and unzip data if it doesn't exist
if not os.path.exists(os.path.join(data_dir, 'ISIC_2020_Training_JPEG')):
    os.system(f"wget {image_data_url} -P {data_dir}")
    os.system(f"unzip {os.path.join(data_dir, 'ISIC_2020_Training_JPEG.zip')} -d {data_dir}")

In [6]:
# Read metadata
metadata = pd.read_csv(metadata_url)

In [7]:
# Map 'nevus' as 'nevus' and 'melanoma' as 'melanoma'
metadata['diagnosis'] = metadata['diagnosis'].apply(lambda x: x.lower())

In [8]:
# Filter out 'nevus' and 'melanoma' classes only
valid_classes = ['nevus', 'melanoma']
metadata_filtered = metadata[metadata['diagnosis'].isin(valid_classes)]
metadata_filtered = shuffle(metadata_filtered, random_state=42)

In [9]:
# Split the data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    metadata_filtered['image_name'], (metadata_filtered['diagnosis'] == 'melanoma').astype(int),
    test_size=0.2, random_state=42, stratify=metadata_filtered['diagnosis']
)

In [10]:
# Directories for training and validation data
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'validation')

In [11]:
# Create directories for classes
for class_name in valid_classes:
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)

In [12]:
# Copy images to class directories
for image_name, label in zip(X_train, y_train):
    class_name = 'melanoma' if label == 1 else 'nevus'
    source_path = os.path.join(data_dir, 'train', f'{image_name}.jpg')
    destination_path = os.path.join(train_dir, class_name, f'{image_name}.jpg')
    if os.path.exists(source_path):
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        shutil.copy(source_path, destination_path)
    else:
        print(f"Image not found: {source_path}")

for image_name, label in zip(X_val, y_val):
    class_name = 'melanoma' if label == 1 else 'nevus'
    source_path = os.path.join(data_dir, 'train', f'{image_name}.jpg')
    destination_path = os.path.join(val_dir, class_name, f'{image_name}.jpg')
    if os.path.exists(source_path):
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        shutil.copy(source_path, destination_path)
    else:
        print(f"Image not found: {source_path}")

In [13]:
# Class distribution in the training and validation sets
class_distribution_train = y_train.value_counts()
class_distribution_val = y_val.value_counts()
print("Class distribution in the training set:\n", class_distribution_train)
print("\nClass distribution in the validation set:\n", class_distribution_val)

Class distribution in the training set:
 0    4154
1     467
Name: diagnosis, dtype: int64

Class distribution in the validation set:
 0    1039
1     117
Name: diagnosis, dtype: int64


In [14]:
# Image dimensions and batch size
img_width, img_height = 224, 224
input_shape = (img_width, img_height, 3)
batch_size = 32

In [15]:
# Data augmentation and generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary',
    classes=valid_classes
)

validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_directory(
    val_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary',
    classes=valid_classes
)


Found 4621 images belonging to 2 classes.
Found 1156 images belonging to 2 classes.


In [16]:
# Model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [17]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [20]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size,
    epochs=10,
    callbacks=[EarlyStopping(patience=3)]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [21]:
# Evaluate the model
evaluation = model.evaluate(validation_generator)
print("Validation Loss:", evaluation[0])
print("Validation Accuracy:", evaluation[1])

Validation Loss: 0.18450379371643066
Validation Accuracy: 0.9429065585136414


In [27]:
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input

image_path = '/content/data/train/melanoma/ISIC_0250839.jpg'

# Load and preprocess the image
img = image.load_img(image_path, target_size=(img_width, img_height))
img = image.img_to_array(img)
img = preprocess_input(img)
img = np.expand_dims(img, axis=0)  # Add batch dimension

# Predict the class
prediction = model.predict(img)

if prediction > 0.5:
    print("Melanoma")
else:
    print("Nevus")


Melanoma
