In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from PIL import Image
import os
import shutil
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
# Define the path to the data
data_path = 'data/Unified'

# List the subfolders in the training data folder
subfolders = os.listdir(data_path)

# Initialize a dictionary to hold the count of images in each subfolder
image_counts_data = {}

# Count the number of images in each subfolder
for subfolder in subfolders:
    subfolder_path = os.path.join(data_path, subfolder)
    if os.path.isdir(subfolder_path):
        num_images = len(os.listdir(subfolder_path))
        image_counts_data[subfolder] = num_images

# Display the results
print("Number of images in each class (Training data):")
for subfolder, count in image_counts_data.items():
    print(f"{subfolder}: {count}")

Number of images in each class (Training data):
glioma_tumor: 926
meningioma_tumor: 937
no_tumor: 500
pituitary_tumor: 901


In [4]:
# Paths
original_training_data_path = 'data/Unified'
augmented_training_data_path = 'data/Augmented'

# Initialize ImageDataGenerator with augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Function to augment images of a specific class
def augment_images(class_name, target_count):
    class_path = os.path.join(original_training_data_path, class_name)
    augmented_class_path = os.path.join(augmented_training_data_path, class_name)
    
    # Create subfolder in augmented_training if it doesn't exist
    if not os.path.exists(augmented_class_path):
        os.makedirs(augmented_class_path)
    
    # List existing images
    existing_images = os.listdir(class_path)
    existing_count = len(existing_images)
    
    # Copy existing images to augmented_training
    for image_name in existing_images:
        src_path = os.path.join(class_path, image_name)
        dst_path = os.path.join(augmented_class_path, image_name)
        if not os.path.exists(dst_path):  # Check if image already exists in destination
            shutil.copy(src_path, dst_path)
    
    # Start augmentation until target_count is reached
    for i in range(target_count - existing_count):
        # Randomly select an image to augment
        image_name = np.random.choice(existing_images)
        image_path = os.path.join(class_path, image_name)
        img = image.load_img(image_path, target_size=(128, 128))
        
        # Convert to numpy array and add batch dimension
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        
        # Generate augmented image
        augmented_images = next(datagen.flow(x, batch_size=1))
        augmented_image = image.array_to_img(augmented_images[0])
        
        # Save augmented image
        save_path = os.path.join(augmented_class_path, f"aug_{i}_{image_name}")
        augmented_image.save(save_path)

# Function to copy images of a specific class
def copy_images(class_name):
    class_path = os.path.join(original_training_data_path, class_name)
    augmented_class_path = os.path.join(augmented_training_data_path, class_name)
    
    # Create subfolder in augmented_training if it doesn't exist
    if not os.path.exists(augmented_class_path):
        os.makedirs(augmented_class_path)
    
    # List existing images
    existing_images = os.listdir(class_path)
    
    # Copy existing images to augmented_training
    for image_name in existing_images:
        src_path = os.path.join(class_path, image_name)
        dst_path = os.path.join(augmented_class_path, image_name)
        if not os.path.exists(dst_path):  # Check if image already exists in destination
            shutil.copy(src_path, dst_path)

# Get the number of images in each class in the original training data
image_counts = {class_name: len(os.listdir(os.path.join(original_training_data_path, class_name))) 
                for class_name in os.listdir(original_training_data_path) 
                if os.path.isdir(os.path.join(original_training_data_path, class_name))}

# Find the class with the maximum number of images
max_count = max(image_counts.values())

# Check if augmentation for 'no_tumor' is needed
augmented_no_tumor_path = os.path.join(augmented_training_data_path, 'no_tumor')
if not os.path.exists(augmented_no_tumor_path) or len(os.listdir(augmented_no_tumor_path)) < max_count:
    # Create augmented_training folder if it doesn't exist
    if not os.path.exists(augmented_training_data_path):
        os.makedirs(augmented_training_data_path)
    
    augment_images('no_tumor', max_count)
else:
    print("Augmented images already exist. Skipping augmentation.")

# Copy all original training images to augmented_training
for class_name in image_counts.keys():
    if class_name != 'no_tumor':  # We've already handled no_tumor
        copy_images(class_name)

## Calculating class weights

In [5]:
# Classes
classes = list(image_counts.keys())

# Number of samples in each class
samples_per_class = [image_counts[cls] for cls in classes]

# Calculate class weights
class_weights = compute_class_weight('balanced', classes=np.unique(classes), y=[cls for cls in classes for _ in range(image_counts[cls])])
class_weight_dict = dict(zip(classes, class_weights))

print("Class Weights:", class_weight_dict)

Class Weights: {'glioma_tumor': 0.8812095032397408, 'meningioma_tumor': 0.8708644610458911, 'no_tumor': 1.632, 'pituitary_tumor': 0.9056603773584906}


## Neural Networks

In [6]:
# Image dimensions
img_height, img_width = 128, 128
batch_size = 32

In [7]:
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  # set the validation split
)

In [8]:
train_generator = datagen.flow_from_directory(
    data_path,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    subset='training'  # set as training data
)

validation_generator = datagen.flow_from_directory(
    data_path,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    subset='validation'  # set as validation data
)

Found 2612 images belonging to 4 classes.
Found 652 images belonging to 4 classes.


In [26]:
print(len(train_generator))

104


In [9]:
# Get a batch of images and labels
images, labels = train_generator[0]

# Get the first image and label from the batch
first_image = images[0]
first_label = labels[0]

# Convert the image array to a Pillow Image
first_image = Image.fromarray((first_image * 255).astype('uint8'))

# Show the image
#first_image.show()

# Print the label
print(f'Label: {first_label}')


Label: [0. 0. 0. 1.]


In [10]:
# Get the shape of the images
image_shape = train_generator.image_shape
print(f'Image Shape: {image_shape}')

# Get the number of batches per epoch
num_batches = len(train_generator)
print(f'Number of Batches per Epoch: {num_batches}')

# Get the total number of samples
num_samples = train_generator.n
print(f'Total Number of Samples: {num_samples}')

# Get the batch size
batch_size = train_generator.batch_size
print(f'Batch Size: {batch_size}')

Image Shape: (128, 128, 3)
Number of Batches per Epoch: 82
Total Number of Samples: 2612
Batch Size: 32


In [60]:
sample_image = images[10]
print(sample_image)

[[[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 ...

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  ...
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]]


## Convolutional Neural Network (CNN)

In [11]:
# Get the class indices
class_indices = train_generator.class_indices

# Map class names to class indices
class_weight_indices = {class_indices[class_name]: weight for class_name, weight in class_weight_dict.items()}

In [14]:
# Set random seed
tf.random.set_seed(42)

# Build the model
model_1 = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(4, activation='softmax')
])

# Compile the model
model_1.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

# Train the model
history_1 = model_1.fit(train_generator, epochs=10, validation_data=validation_generator, class_weight=class_weight_indices)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


The accuracy on the train data is much higher than the validation accuracy, so we will try other configurations.

In [15]:
# Set random seed
tf.random.set_seed(42)

model_2 = keras.Sequential([
    # First convolutional block
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),
    # Second convolutional block
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(4, activation='softmax')
])

model_2.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

history_2 = model_2.fit(train_generator, epochs=10, validation_data=validation_generator, class_weight=class_weight_indices)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [34]:
# Set random seed
tf.random.set_seed(42)
# Data augmentation
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
])

# Build the model
model_3 = keras.Sequential([
    data_augmentation,
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),
    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(4, activation='softmax')
])

# Compile the model
model_3.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Train the model
history_3 = model_3.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator,
    class_weight=class_weight_indices,
    callbacks=[early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


In [35]:
# Set random seed
tf.random.set_seed(42)

# Data augmentation
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
])

# Build the model
model_4 = keras.Sequential([
    data_augmentation,
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),
    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(4, activation='softmax')
])

# Compile the model
model_4.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                loss='categorical_crossentropy',
                metrics=['accuracy'])

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Train the model
history_4 = model_4.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator,
    class_weight=class_weight_indices
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
# Set random seed
tf.random.set_seed(42)

# Load MobileNetV2
base_model = tf.keras.applications.MobileNetV2(input_shape=(128,128,3), 
                                               include_top=False, 
                                               weights="imagenet")
base_model.trainable = False

# Create a new model with custom layers
model_5 = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(4, activation='softmax')
])

# Compile the model with a smaller learning rate
model_5.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
                loss='categorical_crossentropy', 
                metrics=['mae', 'accuracy'])

# Train the model with class weights
history_5 = model_5.fit(train_generator, 
                        epochs=20, 
                        validation_data=validation_generator, 
                        class_weight=class_weight_indices)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [33]:
# Set random seed
tf.random.set_seed(42)

# Load MobileNetV2
base_model = tf.keras.applications.MobileNetV2(input_shape=(128,128,3), 
                                               include_top=False, 
                                               weights="imagenet")

# Unfreeze the base model
base_model.trainable = True

# Freeze all layers except for the last 10
for layer in base_model.layers[:-10]:
    layer.trainable = False

# Create a new model with custom layers
model_6 = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(4, activation='softmax')
])

# Compile the model with a smaller learning rate
model_6.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001), 
                loss='categorical_crossentropy', 
                metrics=[
                    'mae', 
                    'accuracy', 
                    tf.keras.metrics.Precision(), 
                    tf.keras.metrics.Recall()
                ])

# Continue training
history_6 = model_6.fit(train_generator, 
                             epochs=15, 
                             validation_data=validation_generator, 
                             class_weight=class_weight_indices)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
