In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, SequentialSampler, SubsetRandomSampler, Subset
from torchvision import transforms, datasets
from sklearn.model_selection import KFold

In [21]:
# Data augmentation and normalization for training
# Just normalization for validation and testing
train_transforms = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize all images to a standard size
    transforms.RandomHorizontalFlip(),  # Randomly flip images horizontally
    transforms.RandomRotation(10),  # Randomly rotate images by up to 10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly change brightness, contrast, saturation, and hue
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Random translation
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transforms = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load the datasets with ImageFolder
train_dataset = datasets.ImageFolder(root='archive/Training', transform=train_transforms)
test_dataset = datasets.ImageFolder(root='archive/Testing', transform=test_transforms)

# Create the dataloaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=8)

In [22]:
# the size of the dataset and the shape of images
num_images = len(train_dataset)
image_shape = next(iter(train_loader))[0].shape[1:]  # CxHxW

image_shape

torch.Size([3, 64, 64])

In [23]:
# Initialize numpy arrays for storing data and labels
data_all = np.zeros((num_images,) + image_shape, dtype=np.float32)
labels_all = np.zeros(num_images, dtype=np.int64)

# Iterate over the DataLoader and fill the arrays
start_idx = 0
for images, labels in train_loader:
    end_idx = start_idx + images.size(0)
    data_all[start_idx:end_idx] = images.numpy()
    labels_all[start_idx:end_idx] = labels.numpy()
    start_idx = end_idx
    
# Now data_all is a NumPy array containing all images
# and labels_all contains all corresponding labels
# {'glioma': 0, 'meningioma': 1, 'notumor': 2, 'pituitary': 3}

In [25]:
class BrainTumorClassifier(nn.Module):
    def __init__(self):
        super(BrainTumorClassifier, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        
        # Pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Calculate the size of the flattened output after conv and pooling layers
        self.flattened_size = (64 // (2**3)) * (64 // (2**3)) * 128  # Update for 64x64 images

        # Fully connected layers
        self.fc1 = nn.Linear(self.flattened_size, 512)
        self.fc2 = nn.Linear(512, 4)
        
        # Activation and dropout
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        
        x = x.view(x.size(0), -1)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.softmax(self.fc2(x))
        
        return x

# Convert labels to one-hot encoding
def to_one_hot(labels, num_classes):
    return torch.eye(num_classes)[labels]

In [26]:
y_train_one_hot = to_one_hot(y_train, num_classes=4)
y_test_one_hot = to_one_hot(y_test, num_classes=4)

y_train_one_hot

tensor([[0., 0., 0., 1.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        ...,
        [0., 0., 0., 1.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.]])

In [47]:
#Subset sampling
k_folds = 10
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)

device = torch.device("mps")
num_epochs = 8
subset_size = 80

for fold, (train_ids, test_ids) in enumerate(kfold.split(train_dataset)):
    print(f'Fold {fold+1}/{k_folds}')

    # Randomly sample indices for training and testing
    train_indices = np.random.choice(len(train_dataset), subset_size, replace=False)
    test_indices = np.random.choice(len(test_dataset), subset_size, replace=False)

    # Create samplers
    train_sampler = SubsetRandomSampler(train_indices)
    test_sampler = SubsetRandomSampler(test_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=64, sampler=train_sampler, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=64, sampler=test_sampler, num_workers=0)

    # Instantiate a new model for this fold
    model = BrainTumorClassifier()
    model.to(device)

    # Define the loss function and optimizer for this fold
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Start the training loop for this fold
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        # Training loop
        for data in train_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_accuracy = 100 * correct_train / total_train
        train_loss = running_loss / len(train_loader)

        # Validation loop
        model.eval()  # Set the model to evaluation mode
        running_loss_val = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for data in test_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss_val += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_accuracy = 100 * correct_val / total_val
        val_loss = running_loss_val / len(test_loader)

        # Print statistics for this epoch
        print(f'Epoch {epoch+1}/{num_epochs}, '
              f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, '
              f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')

    print(f'Finished Training for Fold {fold+1}')


Fold 1/10
Epoch 1/8, Train Loss: 1.4082, Train Accuracy: 26.25%, Val Loss: 1.3677, Val Accuracy: 30.00%
Epoch 2/8, Train Loss: 1.3489, Train Accuracy: 26.25%, Val Loss: 1.3426, Val Accuracy: 35.00%
Epoch 3/8, Train Loss: 1.3650, Train Accuracy: 46.25%, Val Loss: 1.3532, Val Accuracy: 33.75%
Epoch 4/8, Train Loss: 1.2763, Train Accuracy: 50.00%, Val Loss: 1.3467, Val Accuracy: 35.00%
Epoch 5/8, Train Loss: 1.2813, Train Accuracy: 51.25%, Val Loss: 1.3419, Val Accuracy: 35.00%
Epoch 6/8, Train Loss: 1.2642, Train Accuracy: 46.25%, Val Loss: 1.2958, Val Accuracy: 35.00%
Epoch 7/8, Train Loss: 1.1870, Train Accuracy: 48.75%, Val Loss: 1.3453, Val Accuracy: 35.00%
Epoch 8/8, Train Loss: 1.1917, Train Accuracy: 52.50%, Val Loss: 1.3232, Val Accuracy: 43.75%
Finished Training for Fold 1
Fold 2/10
Epoch 1/8, Train Loss: 1.3753, Train Accuracy: 21.25%, Val Loss: 1.4034, Val Accuracy: 25.00%
Epoch 2/8, Train Loss: 1.2870, Train Accuracy: 38.75%, Val Loss: 1.3179, Val Accuracy: 46.25%
Epoch 3/8, 

In [48]:
#Full training / Validation
k_folds = 5
num_epochs = 8

for fold in range(k_folds):
    print(f'Fold {fold+1}/{k_folds}')

    # Create data loaders for the entire datasets
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

    # Instantiate a new model for this fold
    model = BrainTumorClassifier()
    model.to(device)

    # Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Training and validation loop
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        # Training loop
        for data in train_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_accuracy = 100 * correct_train / total_train
        train_loss = running_loss / len(train_loader)

        # Validation loop
        model.eval()  # Set the model to evaluation mode
        running_loss_val = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for data in test_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss_val += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_accuracy = 100 * correct_val / total_val
        val_loss = running_loss_val / len(test_loader)

        # Print statistics for this epoch
        print(f'Epoch {epoch+1}/{num_epochs}, '
              f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, '
              f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')

    print(f'Finished Training for Fold {fold+1}')


Fold 1/5
Epoch 1/8, Train Loss: 1.1819, Train Accuracy: 54.81%, Val Loss: 1.1059, Val Accuracy: 62.47%
Epoch 2/8, Train Loss: 1.1136, Train Accuracy: 62.24%, Val Loss: 1.0701, Val Accuracy: 66.74%
Epoch 3/8, Train Loss: 1.0570, Train Accuracy: 67.91%, Val Loss: 1.0498, Val Accuracy: 69.49%
Epoch 4/8, Train Loss: 1.0370, Train Accuracy: 70.22%, Val Loss: 1.0828, Val Accuracy: 64.15%
Epoch 5/8, Train Loss: 1.0096, Train Accuracy: 73.02%, Val Loss: 0.9894, Val Accuracy: 74.45%
Epoch 6/8, Train Loss: 1.0025, Train Accuracy: 73.55%, Val Loss: 1.0163, Val Accuracy: 71.93%
Epoch 7/8, Train Loss: 0.9878, Train Accuracy: 75.47%, Val Loss: 0.9863, Val Accuracy: 74.83%
Epoch 8/8, Train Loss: 0.9673, Train Accuracy: 77.17%, Val Loss: 1.0395, Val Accuracy: 69.26%
Finished Training for Fold 1
Fold 2/5
Epoch 1/8, Train Loss: 1.1879, Train Accuracy: 54.31%, Val Loss: 1.0767, Val Accuracy: 65.68%
Epoch 2/8, Train Loss: 1.1142, Train Accuracy: 62.48%, Val Loss: 1.0906, Val Accuracy: 63.69%
Epoch 3/8, Tr