**Helpful resources**

Sklearn train_test_split - https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
Implementing a CNN in TensorFlow & Keras - https://learnopencv.com/implementing-cnn-tensorflow-keras/

**SET UP STEPS - Windows**
# Recommeded create virtual environment
Go to Jupyter Notebook kernels (top right)
Select another kernel
Python Environments
+ Create Python Environment
Venv
Select Python interpreter and create

# Prepare venv using terminal
activate environment: .\.venv\Scripts\activate

# Installations on venv
pip install jupyter ipykernel
pip install matplotlib 
pip install pandas
pip install scikit-learn
pip install tensorflow



In [23]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
print(tf.__version__)

2.16.2


In [24]:
def load_process_images(image_path):
    image_path += ".jpg"
    img = load_img(image_path)
    img_array = img_to_array(img)

    img_array = img_array / 255.0
    return img_array


def get_all_data(all_images, labels, path):
    images = []
    all_labels = []

    for img, label in zip(all_images, labels):
        img_path = os.path.join(path, img).replace("\\", "/")
        img_pixels = load_process_images(img_path)

        images.append(img_pixels)
        all_labels.append(label)
    
    return np.array(images), np.array(all_labels)


def build_model():

    # random bs
    model = models.Sequential([
        
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid')
    ])
    
    return model


In [25]:
# File paths
labels_path = "../preprocessed_images/ISIC_2019_Training_GroundTruth_preprocessed.csv"
images_path = "../preprocessed_images"

In [None]:
''' 
Create 80 - 20 train test (validation set) split

stratify - Ensures class distribution is similar in both sets based on the labels

'''
df = pd.read_csv(labels_path)
image_names = df['image'].values # X - images
image_labels = df['MEL'].values # y - labels

X_train_names, X_test_names, y_train, y_test = train_test_split(image_names, image_labels, 
                                                    test_size=0.2, random_state=88, stratify=image_labels)

# print("Train set size: ", len(X_train))
# print("Test set size: ", len(X_test))

X_train, y_train = get_all_data(X_train_names, y_train, images_path)

X_testing, y_testing = get_all_data(X_test_names, y_test, images_path)

model = build_model()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
#CONSTANTS

BATCH_SIZE = 32
EPOCHS = 50


model = build_model()

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC()]
)

# Callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)


# Train the model
print("Starting training...")
history = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=EPOCHS,
    validation_data=(X_testing, y_testing),
    callbacks=[early_stopping]
)

# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()

Starting training...
Epoch 1/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 1s/step - accuracy: 0.6058 - auc_1: 0.6471 - loss: 0.9412 - val_accuracy: 0.5003 - val_auc_1: 0.5637 - val_loss: 1.0364
Epoch 2/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 1s/step - accuracy: 0.6598 - auc_1: 0.7136 - loss: 0.6585 - val_accuracy: 0.5832 - val_auc_1: 0.7360 - val_loss: 0.7525
Epoch 3/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 1s/step - accuracy: 0.6792 - auc_1: 0.7485 - loss: 0.6090 - val_accuracy: 0.6175 - val_auc_1: 0.7505 - val_loss: 0.7260
Epoch 4/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m220s[0m 968ms/step - accuracy: 0.6969 - auc_1: 0.7609 - loss: 0.5888 - val_accuracy: 0.6733 - val_auc_1: 0.7572 - val_loss: 0.6084
Epoch 5/50
[1m227/227[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 882ms/step - accuracy: 0.7207 - auc_1: 0.7925 - loss: 0.5541 - val_accuracy: 0.5064 - val_auc_1: 0.65