In [16]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model


In [14]:
import os
import cv2
import numpy as np
from tqdm import tqdm

IMG_SIZE = 224

# Combine both image directories into a list
IMG_DIRS = ['Images/HAM10000_images_part_1', 'Images/HAM10000_images_part_2']

# Create a dictionary for quick image lookup
image_paths = {}
import pandas as pd

# Load metadata
df = pd.read_csv('Images/HAM10000_metadata.csv')

# Create binary labels: 'malignant' for (melanoma, BCC, AKIEC), else 'benign'
df['label'] = df['dx'].apply(lambda x: 'malignant' if x in ['mel', 'bcc', 'akiec'] else 'benign')


for dir in IMG_DIRS:
    for img_name in os.listdir(dir):
        if img_name.endswith('.jpg'):
            image_id = img_name.split('.')[0]
            image_paths[image_id] = os.path.join(dir, img_name)

def load_images_from_metadata(df):
    images = []
    labels = []
    for i, row in tqdm(df.iterrows(), total=len(df)):
        img_id = row['image_id']
        if img_id in image_paths:
            img_path = image_paths[img_id]
            img = cv2.imread(img_path)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            images.append(img)
            labels.append(row['label'])
    return np.array(images), np.array(labels)

# Example usage (assuming you've already added 'label' column in metadata)
X, y = load_images_from_metadata(df)
X = X / 255.0  # Normalize


100%|████████████████████████████████████████████████████████████████████████████| 10015/10015 [02:57<00:00, 56.48it/s]


In [15]:
# Convert labels to 0 (benign) and 1 (malignant)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, stratify=y_encoded)

print(f"Train shape: {X_train.shape}, Val shape: {X_val.shape}")


Train shape: (8012, 224, 224, 3), Val shape: (2003, 224, 224, 3)


In [17]:
base_model = MobileNetV2(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights='imagenet')
base_model.trainable = False  # Freeze base model

# Custom classification head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)

# Compile
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [18]:
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=5, batch_size=32)


Epoch 1/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m359s[0m 1s/step - accuracy: 0.7774 - loss: 0.4970 - val_accuracy: 0.8218 - val_loss: 0.3825
Epoch 2/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m316s[0m 1s/step - accuracy: 0.8165 - loss: 0.3855 - val_accuracy: 0.8258 - val_loss: 0.3734
Epoch 3/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m308s[0m 1s/step - accuracy: 0.8336 - loss: 0.3696 - val_accuracy: 0.8198 - val_loss: 0.3766
Epoch 4/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m327s[0m 1s/step - accuracy: 0.8302 - loss: 0.3719 - val_accuracy: 0.8333 - val_loss: 0.3577
Epoch 5/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 1s/step - accuracy: 0.8411 - loss: 0.3610 - val_accuracy: 0.8357 - val_loss: 0.3617


In [19]:
loss, acc = model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {acc:.2f}")



[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 1s/step - accuracy: 0.8305 - loss: 0.3737
Validation Accuracy: 0.84


In [20]:
model.save('skin_cancer_model.h5')


