In [1]:
import numpy as np
import pandas as pd
import os
import zipfile
import tensorflow as tf
from google.colab import drive
import glob
from keras.preprocessing.image import load_img, img_to_array
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Link to dataset https://www.kaggle.com/datasets/thomasdubail/brain-tumors-256x256/code
with zipfile.ZipFile('/content/drive/MyDrive/archive (2).zip', 'r') as zip_ref:
    zip_ref.extractall('data/')


In [3]:
base_dir = '/content/data/Data'

# Create lists to hold image file paths and corresponding labels
image_paths = []
labels = []

# Define image size
image_size = (128, 128)

for folder_name in os.listdir(base_dir):
    if os.path.isdir(os.path.join(base_dir, folder_name)):
        file_list = glob.glob(os.path.join(base_dir, folder_name, '*.jpg'))
        print(len(file_list))


        for file_path in file_list:
            image_paths.append(file_path)
            labels.append(folder_name)

# Convert the lists to numpy arrays
image_paths = np.array(image_paths)
labels = np.array(labels)

# Load images and convert them to arrays
images = np.array([img_to_array(load_img(img, target_size=image_size)) for img in image_paths])

438
913
901
844


In [4]:
# Encode the labels as integers
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Shuffle the dataset
shuffled_indices = np.arange(images.shape[0])
np.random.shuffle(shuffled_indices)
images = images[shuffled_indices]
labels_encoded = labels_encoded[shuffled_indices]

# Split the data into a training set and a validation set
X_train, X_val, y_train, y_val = train_test_split(images, labels_encoded, test_size=0.2, random_state=42)

# Convert the integer labels to one-hot vectors
y_train_one_hot = to_categorical(y_train)
y_val_one_hot = to_categorical(y_val)

# Check the distribution of labels to ensure there are four classes
unique_labels, counts = np.unique(labels_encoded, return_counts=True)
label_distribution = dict(zip(label_encoder.inverse_transform(unique_labels), counts))
print(f"Label distribution: {label_distribution}")

Label distribution: {'glioma_tumor': 901, 'meningioma_tumor': 913, 'normal': 438, 'pituitary_tumor': 844}


In [5]:
from keras.preprocessing.image import ImageDataGenerator

# Define the image data generator with only rescaling
train_datagen = ImageDataGenerator(
    rescale=1./255
)

# Create the training data generator
train_generator = train_datagen.flow(
    X_train,
    y_train_one_hot,
    batch_size=32
)

val_datagen = ImageDataGenerator(rescale=1./255)

# Create the validation data generator
val_generator = val_datagen.flow(
    X_val,
    y_val_one_hot,
    batch_size=32
)

In [6]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(4, activation='softmax')  # Assuming 4 classes
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    train_generator,
    steps_per_epoch=len(X_train) // 32,
    epochs=30,
    validation_data=val_generator,
    validation_steps=len(X_val) // 32
)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
