In [1]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import json
import time

In [2]:
import keras
from keras.applications import VGG16
from keras.models import Model
from keras.layers import Input, Conv2D, Dense, Flatten
from keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

2025-03-11 12:59:29.275934: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
dataset_path = "./dataset"

class_names = os.listdir(dataset_path)
class_names = sorted(class_names)
class_names.remove('.DS_Store')
nb_classes = len(class_names)
print(f'The dataset contains {nb_classes} classes: {class_names}')

The dataset contains 4 classes: ['glioma', 'meningioma', 'notumor', 'pituitary']


In [4]:
image_size = (224, 224)
batch_size = 32

X = []
y = []

for class_name in class_names:
    class_path = os.path.join(dataset_path, class_name)
    for filename in os.listdir(class_path):
        img_path = os.path.join(class_path, filename)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Grayscale
        img = cv2.resize(img, image_size)  # Resize to 224x224 (image size of VGG16)
        img = img / 255.0  # Normalize pixel value to [0, 1]
        X.append(img)
        y.append(class_name)

In [5]:
# Convert to numpy arrays
X = np.array(X, dtype=np.float32).reshape(-1, 224, 224, 1)  # Reshape for CNN
y_label = np.array(y).reshape(-1, 1)

In [6]:
# Encode class labels
encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(y_label)  # Convert class names to binary vectors

In [7]:
print(X.shape)
print(y_label.shape)
print(y.shape)

(5712, 224, 224, 1)
(5712, 1)
(5712, 4)


In [8]:
# First split: Train (80%) and Temp (20%)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True, stratify=y)

In [9]:
# Second split: Validation (10%) and Test (10%)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [10]:
# Quick tests to ensure the data is consistent and correctly split

assert len(X) == len(y), "X and y should be of same length" # If not, some images will be missing a label
assert len(X_train) == len(y_train), "X_train and y_train must be of same length"
assert len(X_val) == len(y_val), "X_val and y_val must be of same length"
assert len(X_test) == len(y_test), "X_test and y_test must be of same length"
assert len(X_train) + len(X_test) + len(X_val) == len(X), "X must be on length len(X_train) + len(X_test)"

In [11]:
optimizer = Adam(learning_rate=0.001)

early_stopping = EarlyStopping(
    monitor="val_loss",  # Monitor validation loss
    patience=3,          # Stop if no improvement for 3 consecutive epochs
    restore_best_weights=True  # Restore the best weights before stopping
)

checkpoint = ModelCheckpoint(
    "model_checkpoint.keras", monitor="val_loss",
    save_best_only=True, verbose=1
)


In [12]:
base_model = VGG16(include_top=False, input_shape=(224, 224, 3), weights="imagenet", classes=nb_classes)

inputs = Input(shape=(224, 224, 1))

x = Conv2D(3, (3, 3), activation='relu', padding="same")(inputs)
x = base_model(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
outputs = Dense(nb_classes, activation='softmax')(x)

VGG16 = Model(inputs, outputs)
VGG16.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy', 'precision', 'recall', 'f1_score', 'categorical_crossentropy', 'AUC'])

VGG16.summary()

In [13]:
history = VGG16.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=15,
    batch_size=16,
    verbose=1,
    callbacks=[early_stopping, checkpoint]
)

Epoch 1/15
[1m 12/286[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:00:04[0m 13s/step - AUC: 0.5247 - accuracy: 0.2444 - categorical_crossentropy: 3.1648 - f1_score: 0.2024 - loss: 3.1648 - precision: 0.2773 - recall: 0.1542

KeyboardInterrupt: 