In [1]:
import os
import cv2
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


np.random.seed(42)
tf.random.set_seed(42)


l = os.listdir 
j = os.path.join 

## helper function

In [2]:
# (helper) load images and labels into variables
def load_imgs_from_dir(dir, assign_label, re_size):

    imgs = []
    labels = []
    for filename in l(dir):

        #get img pth
        img_pth = j(dir, filename)

        #read img
        img = cv2.imread(img_pth, cv2.IMREAD_GRAYSCALE)

        #resize
        img = cv2.resize(img, (re_size, re_size)) 

        #load
        imgs.append(img)
        labels.append(assign_label)

    return imgs, labels

In [3]:
# (main) load data and combine them
def load_data(dpth, split=0.2, rs=42, re_size=128):

    b_pth = dpth + '/bengin'
    m_pth = dpth + '/malignant'
    n_pth = dpth + '/normal'

    b_imgs, b_labels = load_imgs_from_dir(b_pth, assign_label=0, re_size = re_size)  
    m_imgs, m_labels = load_imgs_from_dir(m_pth, assign_label=1, re_size = re_size)  
    n_imgs, n_labels = load_imgs_from_dir(n_pth, assign_label=2, re_size = re_size)

    images = np.array(b_imgs   + m_imgs   + n_imgs)
    labels = np.array(b_labels + m_labels + n_labels)

    return images, labels

In [4]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, confusion_matrix
from sklearn.model_selection import KFold, train_test_split
import numpy as np

# one hot encoding
def OHE(A):
    EA = np.zeros((A.size, A.max() + 1), dtype=int)
    EA[np.arange(A.size), A] = 1
    return EA

# Function to calculate specificity
def specificity_score(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TN = cm[0, 0]  # True Negative
    FP = cm[0, 1]  # False Positive
    specificity = TN / (TN + FP)
    return specificity

def cross_validate_model(model, X_train, y_train, k=5, epochs=10, batch_size=32):
    # Initialize KFold
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    
    # Store the metrics for each fold
    fold_metrics = {
        'train_accuracy': [], 'val_accuracy': [],
        'train_f1': [], 'val_f1': [],
        'train_specificity': [], 'val_specificity': [],
        'train_sensitivity': [], 'val_sensitivity': []
    }
    
    # K-fold cross-validation loop
    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        print(f"\nStarting fold {fold + 1} / {k}")
        
        # Split the data into training and validation sets for the current fold
        X_fold_train, X_fold_val = X_train[train_index], X_train[val_index]
        y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]
        
        # Rebuild the model for each fold
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        
        # Train the model with feedback
        history = model.fit(
            X_fold_train,
            y_fold_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_fold_val, y_fold_val),
            verbose=1
        )
        
        # Initialize lists to store metrics for each epoch
        train_accuracies, val_accuracies = [], []
        train_f1s, val_f1s = [], []
        train_specificities, val_specificities = [], []
        train_sensitivities, val_sensitivities = [], []

        # Evaluate metrics for each epoch
        for epoch in range(epochs):
            # Predictions on training data
            y_train_pred = np.argmax(model.predict(X_fold_train), axis=1)
            y_train_true = np.argmax(y_fold_train, axis=1)
            
            # Predictions on validation data
            y_val_pred = np.argmax(model.predict(X_fold_val), axis=1)
            y_val_true = np.argmax(y_fold_val, axis=1)
            
            # Calculate metrics for training data
            train_acc = accuracy_score(y_train_true, y_train_pred)
            train_f1 = f1_score(y_train_true, y_train_pred, average='weighted')
            train_specificity = specificity_score(y_train_true, y_train_pred)
            train_sensitivity = recall_score(y_train_true, y_train_pred, average='weighted')
            
            # Calculate metrics for validation data
            val_acc = accuracy_score(y_val_true, y_val_pred)
            val_f1 = f1_score(y_val_true, y_val_pred, average='weighted')
            val_specificity = specificity_score(y_val_true, y_val_pred)
            val_sensitivity = recall_score(y_val_true, y_val_pred, average='weighted')
            
            # Append metrics for the epoch
            train_accuracies.append(train_acc)
            train_f1s.append(train_f1)
            train_specificities.append(train_specificity)
            train_sensitivities.append(train_sensitivity)

            val_accuracies.append(val_acc)
            val_f1s.append(val_f1)
            val_specificities.append(val_specificity)
            val_sensitivities.append(val_sensitivity)
        
        # Calculate average metrics for the current fold
        fold_metrics['train_accuracy'].append(np.mean(train_accuracies))
        fold_metrics['val_accuracy'].append(np.mean(val_accuracies))
        fold_metrics['train_f1'].append(np.mean(train_f1s))
        fold_metrics['val_f1'].append(np.mean(val_f1s))
        fold_metrics['train_specificity'].append(np.mean(train_specificities))
        fold_metrics['val_specificity'].append(np.mean(val_specificities))
        fold_metrics['train_sensitivity'].append(np.mean(train_sensitivities))
        fold_metrics['val_sensitivity'].append(np.mean(val_sensitivities))
        
        print(f"Fold {fold + 1} - Train Accuracy: {np.mean(train_accuracies)}, Validation Accuracy: {np.mean(val_accuracies)}")
    
    # Compute the overall average of all metrics across all folds
    avg_train_accuracy = np.mean(fold_metrics['train_accuracy'])
    avg_val_accuracy = np.mean(fold_metrics['val_accuracy'])
    avg_train_f1 = np.mean(fold_metrics['train_f1'])
    avg_val_f1 = np.mean(fold_metrics['val_f1'])
    avg_train_specificity = np.mean(fold_metrics['train_specificity'])
    avg_val_specificity = np.mean(fold_metrics['val_specificity'])
    avg_train_sensitivity = np.mean(fold_metrics['train_sensitivity'])
    avg_val_sensitivity = np.mean(fold_metrics['val_sensitivity'])
    
    print(f"\nAverage Metrics Across {k} Folds:")
    print(f"Train Accuracy: {avg_train_accuracy}, Validation Accuracy: {avg_val_accuracy}")
    print(f"Train F1-Score: {avg_train_f1}, Validation F1-Score: {avg_val_f1}")
    print(f"Train Specificity: {avg_train_specificity}, Validation Specificity: {avg_val_specificity}")
    print(f"Train Sensitivity: {avg_train_sensitivity}, Validation Sensitivity: {avg_val_sensitivity}")
    
    return fold_metrics



# You can later evaluate on test data like this:
# model.evaluate(X_test, OHE(y_test))


In [5]:
# model architecture
model = Sequential([

    Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 1)),
    MaxPooling2D((2, 2)),
    

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Conv2D(256, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(512, activation='relu'),
    
    Dense(3, activation='softmax')  
])

## Beginning

In [6]:
# Load and split the data
images, labels = load_data('./data', re_size=256)
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Convert labels to one-hot encoding for training data
y_train_ohe = OHE(y_train)

# Perform cross-validation on the training data (test data is excluded here)
cross_validate_model(model, X_train, y_train_ohe, k=5, epochs=10, batch_size=32)



Starting fold 1 / 5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold 1 - Train Accuracy: 0.9871611982881598, Validation Accuracy: 0.9545454545454547

Starting fold 2 / 5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold 2 - Train Accuracy: 0.9885877318116977, Validation Accuracy: 0.9772727272727272

Starting fold 3 / 5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold 3 - Train Accuracy: 0.9928774928774929, Validation Accuracy: 1.0

Starting fold 4 / 5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold 4 - Train Accuracy: 0.9501424501424502, Validation Accuracy: 0.9257142857142856

Starting fold 5 / 5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold 5 - Train 

{'train_accuracy': [0.9871611982881598,
  0.9885877318116977,
  0.9928774928774929,
  0.9501424501424502,
  0.9729344729344728],
 'val_accuracy': [0.9545454545454547,
  0.9772727272727272,
  1.0,
  0.9257142857142856,
  0.9485714285714286],
 'train_f1': [0.987432017852376,
  0.9886740492027293,
  0.992815344591819,
  0.9528120579516998,
  0.9721051576854156],
 'val_f1': [0.9554817796201679,
  0.9773039968161917,
  1.0,
  0.9315620457248579,
  0.9479398834009112],
 'train_specificity': [1.0, 1.0, 0.9736842105263157, 1.0, 1.0],
 'val_specificity': [1.0, 1.0, 1.0, 1.0, 1.0],
 'train_sensitivity': [0.9871611982881598,
  0.9885877318116977,
  0.9928774928774929,
  0.9501424501424502,
  0.9729344729344728],
 'val_sensitivity': [0.9545454545454547,
  0.9772727272727272,
  1.0,
  0.9257142857142856,
  0.9485714285714286]}

In [7]:
# model evaluate
model.evaluate(X_test, OHE(y_test))



[0.3824957013130188, 0.9045454263687134]

In [8]:
fold_metrics = {'train_accuracy': [0.9971469329529243,
  0.9957203994293865,
  0.9487179487179487,
  0.9985754985754985,
  1.0],
 'val_accuracy': [0.9943181818181819,
  0.9772727272727272,
  0.9142857142857143,
  0.9885714285714287,
  0.9885714285714287],
 'train_f1': [0.9971612681813132,
  0.9956862738967006,
  0.9508895242222677,
  0.9985718744693102,
  1.0],
 'val_f1': [0.99430832140223,
  0.9767625435609626,
  0.9203669346526488,
  0.9885111146649608,
  0.9883673469387755],
 'train_specificity': [1.0, 1.0, 1.0, 1.0, 1.0],
 'val_specificity': [1.0, 1.0, 1.0, 1.0, 1.0],
 'train_sensitivity': [0.9971469329529243,
  0.9957203994293865,
  0.9487179487179487,
  0.9985754985754985,
  1.0],
 'val_sensitivity': [0.9943181818181819,
  0.9772727272727272,
  0.9142857142857143,
  0.9885714285714287,
  0.9885714285714287]}

In [9]:
print("train accuracy", np.mean(fold_metrics['train_accuracy']))
print("train f1 score", np.mean(fold_metrics['train_f1']))
print("train specificity", np.mean(fold_metrics['train_specificity']))
print("train sensitivity", np.mean(fold_metrics['train_sensitivity']))

train accuracy 0.9880321559351515
train f1 score 0.9884617881539184
train specificity 1.0
train sensitivity 0.9880321559351515


In [10]:
print(np.mean(fold_metrics['val_accuracy']))
print(np.mean(fold_metrics['val_f1']))
print(np.mean(fold_metrics['val_specificity']))
print(np.mean(fold_metrics['val_sensitivity']))

0.9726038961038961
0.9736632522439157
1.0
0.9726038961038961
