## Skin Cancer Classification

### Import required libraries

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
# Read CSV file
df = pd.read_csv("data/hmnist_28_28_RGB.csv")
df.head()

Unnamed: 0,pixel0000,pixel0001,pixel0002,pixel0003,pixel0004,pixel0005,pixel0006,pixel0007,pixel0008,pixel0009,...,pixel2343,pixel2344,pixel2345,pixel2346,pixel2347,pixel2348,pixel2349,pixel2350,pixel2351,label
0,192,153,193,195,155,192,197,154,185,202,...,173,124,138,183,147,166,185,154,177,2
1,25,14,30,68,48,75,123,93,126,158,...,60,39,55,25,14,28,25,14,27,2
2,192,138,153,200,145,163,201,142,160,206,...,167,129,143,159,124,142,136,104,117,2
3,38,19,30,95,59,72,143,103,119,171,...,44,26,36,25,12,17,25,12,15,2
4,158,113,139,194,144,174,215,162,191,225,...,209,166,185,172,135,149,109,78,92,2


In [27]:
# Seperate features and labels
X = df.drop(['label'], axis=1)
y = df["label"]

### Synthetic Sampling

In [12]:
# Use SMOTE + ENN to generate synthetic examples for minority classes
from imblearn.combine import SMOTEENN

smote_enn = SMOTEENN(random_state=42)
X_resampled, y_resampled_df = smote_enn.fit_resample(X, y)

In [13]:
# Reshape features
X_resampled = np.array(X_resampled).reshape(-1, 28, 28, 3)
X_resampled.shape

(43473, 28, 28, 3)

In [16]:
# One-hot-encode labels
from tensorflow.keras.utils import to_categorical

y_resampled = np.array(y_resampled_df)
y_resampled = to_categorical(y_resampled)
y_resampled.shape

(43473, 7)

### Model Definition and Training

In [18]:
from sklearn.model_selection import train_test_split

X_train , X_test , y_train , y_test = train_test_split(X_resampled , y_resampled , test_size = 0.2 , random_state = 42)

In [19]:
# Import libraries for data augmentation and creating the model
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, BatchNormalization, MaxPooling2D, Conv2D, Flatten

In [20]:
model = tf.keras.Sequential([
    Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(28, 28, 3)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Conv2D(256, (3, 3), padding='same', activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    
    Dense(128, activation='relu'),
    BatchNormalization(),
    
    Dense(64, activation='relu'),
    BatchNormalization(),
    
    Dense(64, activation='relu'),
    BatchNormalization(),
    
    Dense(7, activation='softmax')
])


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
# Train the model for 20 epochs
history = model.fit(
    X_train, y_train, 
    epochs=20, 
    validation_data=(X_test, y_test)
)

Epoch 1/20
[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 38ms/step - accuracy: 0.9101 - loss: 0.2578 - val_accuracy: 0.5075 - val_loss: 1.7741
Epoch 2/20
[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 38ms/step - accuracy: 0.9303 - loss: 0.1996 - val_accuracy: 0.7090 - val_loss: 0.9359
Epoch 3/20
[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 38ms/step - accuracy: 0.9507 - loss: 0.1443 - val_accuracy: 0.8638 - val_loss: 0.3870
Epoch 4/20
[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 37ms/step - accuracy: 0.9619 - loss: 0.1161 - val_accuracy: 0.8615 - val_loss: 0.4327
Epoch 5/20
[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 37ms/step - accuracy: 0.9692 - loss: 0.0885 - val_accuracy: 0.8043 - val_loss: 0.5930
Epoch 6/20
[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 38ms/step - accuracy: 0.9747 - loss: 0.0794 - val_accuracy: 0.8928 - val_loss: 0.3126
Epoc

### Model Evaluation (Regular and F1)

In [23]:
test_score = model.evaluate(X_test, y_test, verbose=1)
print("Test Accuracy: ", test_score[1])

[1m272/272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.9842 - loss: 0.0566
Test Accuracy:  0.9833237528800964


In [24]:
from sklearn.metrics import f1_score

# Get predictions
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)

# Calculate F1 score
score = f1_score(y_test, y_pred, average='weighted')
print("F1 Score:", score)

[1m272/272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
F1 Score: 0.9829880156561995


### Save Final Model

In [25]:
model.save(f"./saved_model/model_acc_{round(score, 3)}.h5", save_format='h5')

