# HAM10000 MNIST Skin Cancer Classification

In [9]:
import numpy as np
import pandas as pd
import PIL.Image as Image
from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

## Data Preprocessing (Image Files)

In [13]:
def get_image_data():
    import os
    img_path_root = "data/all_images"
    image_arrays = []
    image_files = os.listdir(img_path_root)
    filename = np.random.choice(image_files)
    path = os.path.join(img_path_root, filename)
    return Image.open(path)

img = get_image_data()
np.array(img).shape

(450, 600, 3)

## Data Preprocessing (CSV)

In [21]:
df = pd.read_csv("data/csvs/hmnist_28_28_RGB.csv")
data = np.array(df)
np.random.shuffle(data)
X = data[:, :-1]
X = X.reshape(X.shape[0], 28, 28, 3) / 255 # Scaling seems to improve accuracy
y = data[:, -1]
y = y.reshape(y.shape[0], 1)
onehot = OneHotEncoder()
y = onehot.fit_transform(y).toarray()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
tensor_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
tensor_test = tf.convert_to_tensor(X_test, dtype=tf.float32)


## Model Architecture and Training

In [22]:
model = Sequential()
#conv2d = tf.keras.layers.Conv2D(10, 3, activation="relu", input_shape=(10, 28, 28, 3))
model.add(layers.Conv2D(80, (5, 5), activation="relu", input_shape=(28, 28, 3)))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
#model.add(layers.BatchNormalization(axis=-1))
model.add(layers.Conv2D(64, (5, 5), activation="relu"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Conv2D(64, (4, 4), activation="relu"))
model.add(layers.Flatten())
model.add(layers.Dense(7, activation="softmax", use_bias=True))
model.summary()
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=".",
    monitor="val_accuracy",
    mode="max",
    save_best_only=True
)
history = model.fit(
    x=tensor_train,
    y=y_train,
    epochs=100,
    batch_size=50,
    validation_split=0.1,
    callbacks=[checkpoint_callback],
)
print(model.evaluate(X_test, y_test))

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_20 (Conv2D)          (None, 24, 24, 80)        6080      
                                                                 
 max_pooling2d_14 (MaxPoolin  (None, 12, 12, 80)       0         
 g2D)                                                            
                                                                 
 conv2d_21 (Conv2D)          (None, 8, 8, 64)          128064    
                                                                 
 max_pooling2d_15 (MaxPoolin  (None, 4, 4, 64)         0         
 g2D)                                                            
                                                                 
 conv2d_22 (Conv2D)          (None, 1, 1, 64)          65600     
                                                                 
 flatten_7 (Flatten)         (None, 64)               



INFO:tensorflow:Assets written to: ./assets


INFO:tensorflow:Assets written to: ./assets


Epoch 2/100



INFO:tensorflow:Assets written to: ./assets


INFO:tensorflow:Assets written to: ./assets


Epoch 3/100



INFO:tensorflow:Assets written to: ./assets


INFO:tensorflow:Assets written to: ./assets


Epoch 4/100
Epoch 5/100
Epoch 6/100



INFO:tensorflow:Assets written to: ./assets


INFO:tensorflow:Assets written to: ./assets


Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100



INFO:tensorflow:Assets written to: ./assets


INFO:tensorflow:Assets written to: ./assets


Epoch 13/100
Epoch 14/100



INFO:tensorflow:Assets written to: ./assets


INFO:tensorflow:Assets written to: ./assets


Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100



INFO:tensorflow:Assets written to: ./assets


INFO:tensorflow:Assets written to: ./assets


Epoch 20/100
Epoch 21/100
Epoch 22/100



INFO:tensorflow:Assets written to: ./assets


INFO:tensorflow:Assets written to: ./assets


Epoch 23/100
Epoch 24/100

KeyboardInterrupt: 