In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
from keras.utils import to_categorical
import cv2
import os

In [3]:
base_dir = r"C:\BeCode\computervisionData\HAM10000_skin_mnist"
metadata = base_dir + "/HAM10000_metadata.csv"
img_dir = base_dir + "/HAM10000_images"

In [4]:
df = pd.read_csv(metadata)

In [5]:
# Split the data into training and testing sets from metadata
x_train, x_test, y_train, y_test = train_test_split(df["image_id"], df["dx"], test_size=0.2)

Load images from img_dir given the name from metadata image_id, resized

In [6]:
images = []
img_ids = df['image_id'].to_list()
for id in img_ids:
    image = plt.imread(os.path.join(img_dir, id + ".jpg"))
    image = cv2.resize(image, (128, 128))
    images.append(image)
images = np.array(images)

Encoding labels as catagory using sklearn labelEncoder

In [7]:
le = LabelEncoder()

In [None]:
df["dx"] = le.fit_transform(df["dx"])
num_labels = len(le.classes_)


In [9]:
print(num_labels)

7


In [10]:
le.classes_

array(['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc'], dtype=object)

In [11]:
y_train = to_categorical(y_train, num_labels)
y_test = to_categorical(y_test, num_labels)

ValueError: invalid literal for int() with base 10: 'nv'

Augmented image generator for train datasets to sove the imbalanced label problem

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')


Building and compile CNN model using keras

In [2]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_labels, activation='softmax'))


model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Train the model

In [3]:
# Train the model
history = model.fit_generator(
    train_datagen.flow(images[x_train.index], y_train, batch_size=32),
    steps_per_epoch=len(x_train) / 32,
    epochs=10)

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(images[x_test.index], y_test, verbose=0)
print("Test accuracy:", test_acc)



  history = model.fit_generator(


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.7094358205795288


Generate the model's performance as a classification report

In [None]:
y_pred = model.predict(images[x_test.index])
y_pred = np.argmax(y_pred, axis=1)
y_test = np.argmax(y_test, axis=1)
print(classification_report(y_test, y_pred, target_names=le.classes_))

Finally save the model in the current directory

In [6]:
model.save('model_keras.h5')

Test the model from external input image by loading the model from the file

In [None]:
img_test_ex = cv2.imread("")
img_test_ex = cv2.resize(img_test_ex, 128,128)
img_test_ex = np.array(img_test_ex)

In [None]:
result = model.predict(img_test_ex)

concern = result.apply(lambda x : 0 if ((x == 'nv') | (x == 'bkl') | (x == 'df') | (x == 'vasc')) else 1)
if not concern:
    print("the images looks of no concern, not cancerous")
else:
    print("the image is classified as cancerous mole and needs medical attention")

