In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

In [None]:
# Load the data into a pandas DataFrame
df = pd.read_csv("HAM10000_metadata.csv")

In [None]:
# Preprocess the image data
img_width, img_height = 96, 96

# Load the image pixels into a numpy array
images = []
for i in df['image_id']:
    img = plt.imread('HAM10000_images_part_1/' + i + '.jpg')
    img = cv2.resize(img, (img_width, img_height))
    images.append(img)

X = np.array(images)

In [None]:
# Preprocess the labels
y = df['dx']

# Encode the labels
le = LabelEncoder()
y = le.fit_transform(y)
y = np_utils.to_categorical(y)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [None]:
# Data augmentation to prevent overfitting
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Define the model architecture
model = Sequential()

model.add(Conv2D(32, (3, 3), input_shape=(img_width, img_height, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(7, activation='softmax'))

In [None]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit_generator(
    train_datagen.flow(X_train, y_train, batch_size=32),
    steps_per_epoch=len(X_train/ 32, epochs=3,
    validation_data=test_datagen.flow(X_test, y_test),
    validation_steps=len(X_test) / 32)

In [None]:
Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)
