In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Model
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.regularizers import l2

In [5]:
from sklearn.model_selection import train_test_split
import numpy as np

def load_data(dataset_dir, img_size=(224, 224), batch_size=16, test_split=0.5):
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=30, width_shift_range=0.2, height_shift_range=0.2,
        shear_range=0.2, zoom_range=0.3, brightness_range=[0.8, 1.2],
        horizontal_flip=True, fill_mode='nearest', validation_split=0.2
    )

    val_test_datagen = ImageDataGenerator(rescale=1./255)  

    class_order = ['normal', 'cataract', 'diabetic_retinopathy', 'glaucoma']  # Ensure all classes exist

    # Load training data (80%)
    train_data = train_datagen.flow_from_directory(
        dataset_dir, target_size=img_size, batch_size=batch_size,
        class_mode='categorical', subset='training', classes=class_order)

    # Load validation + test data (20%)
    val_test_data = train_datagen.flow_from_directory(
        dataset_dir, target_size=img_size, batch_size=batch_size,
        class_mode='categorical', subset='validation', shuffle=False, classes=class_order)

    # Extract all images and labels from validation+test dataset
    val_test_images = []
    val_test_labels = []

    for _ in range(len(val_test_data)):  # Go through all batches
        batch_images, batch_labels = next(val_test_data)  # ✅ Correct: Use next(iterator)
        val_test_images.append(batch_images)
        val_test_labels.append(batch_labels)

    val_test_images = np.concatenate(val_test_images, axis=0)
    val_test_labels = np.concatenate(val_test_labels, axis=0)

    # Convert one-hot labels to categorical class indices
    val_test_labels_indices = np.argmax(val_test_labels, axis=1)

    # Split into validation & test (stratified by class index)
    val_images, test_images, val_labels_indices, test_labels_indices = train_test_split(
        val_test_images, val_test_labels_indices, test_size=test_split, stratify=val_test_labels_indices, random_state=42
    )

    # Convert labels back to one-hot encoding
    num_classes = len(class_order)
    val_labels = np.eye(num_classes)[val_labels_indices]
    test_labels = np.eye(num_classes)[test_labels_indices]

    return train_data, (val_images, val_labels), (test_images, test_labels)

# Load data
retinal_train, (retinal_val_images, retinal_val_labels), (retinal_test_images, retinal_test_labels) = load_data("/Users/blessygrace/DS-Projects/data/raw/seg_dataset")

def load_brain_data(train_dir, test_dir, img_size=(224, 224), batch_size=16):
    
    train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.3,
    brightness_range=[0.8, 1.2],
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  # Keep validation split
)
    test_datagen = ImageDataGenerator(rescale=1./255)
    
    class_order = ['notumor', 'glioma', 'meningioma', 'pituitary']  # Explicit class order
    
    train_data = train_datagen.flow_from_directory(train_dir, target_size=img_size, batch_size=batch_size,
                                             class_mode='categorical', classes=class_order)
    test_data = test_datagen.flow_from_directory(test_dir, target_size=img_size, batch_size=batch_size,
                                            class_mode='categorical', shuffle=False, classes=class_order)
    return train_data, test_data

brain_train, brain_test = load_brain_data("/Users/blessygrace/DS-Projects/data/raw/brainmri/Training", "/Users/blessygrace/DS-Projects/data/raw/brainmri/Testing")


Found 3376 images belonging to 4 classes.
Found 841 images belonging to 4 classes.
Found 5712 images belonging to 4 classes.
Found 1311 images belonging to 4 classes.


In [7]:
from collections import Counter

# Training set distribution
train_counts = Counter(retinal_train.classes)

# Validation & Test set distribution
val_counts = Counter(np.argmax(retinal_val_labels, axis=1))
test_counts = Counter(np.argmax(retinal_test_labels, axis=1))

print("Training set class distribution:", train_counts)
print("Validation set class distribution:", val_counts)
print("Test set class distribution:", test_counts)


# Check distribution in training, validation, and test labels
train_counts = Counter(brain_train.classes)
test_counts = Counter(brain_test.classes)

print("Training set class distribution:", train_counts)
print("Test set class distribution:", test_counts)

Training set class distribution: Counter({2: 879, 0: 860, 1: 831, 3: 806})
Validation set class distribution: Counter({2: 109, 0: 107, 1: 103, 3: 101})
Test set class distribution: Counter({2: 110, 0: 107, 1: 104, 3: 100})
Training set class distribution: Counter({0: 1595, 3: 1457, 2: 1339, 1: 1321})
Test set class distribution: Counter({0: 405, 2: 306, 1: 300, 3: 300})


In [9]:
print("Class Indices:", retinal_train.class_indices)
print("Class Indices:", brain_train.class_indices)

Class Indices: {'normal': 0, 'cataract': 1, 'diabetic_retinopathy': 2, 'glaucoma': 3}
Class Indices: {'notumor': 0, 'glioma': 1, 'meningioma': 2, 'pituitary': 3}
