In [9]:
%pip install --upgrade pip
%pip install pillow tensorflow keras opencv-python

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [10]:
# imports for classification using torch and torchvision
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

In [11]:
data_dir = 'data'
train_dir = os.path.join(data_dir, 'Training')
test_dir = os.path.join(data_dir, 'Testing')

In [12]:
# get subdirectories from train and test
train_classes = [d.name for d in os.scandir(train_dir) if d.is_dir()]

In [13]:
train_image_paths = {cls: [] for cls in train_classes}
test_image_paths = []

In [14]:
# Load image paths for training
for class_name in train_classes:
    class_dir = os.path.join(train_dir, class_name)
    class_image_paths = [os.path.join(class_dir, img.name) for img in os.scandir(class_dir) if img.is_file()]
    train_image_paths[class_name] = class_image_paths

# Load image paths for testing
for class_name in train_classes:
    class_dir = os.path.join(test_dir, class_name)
    class_image_paths = [os.path.join(class_dir, img.name) for img in os.scandir(class_dir) if img.is_file()]
    test_image_paths.extend(class_image_paths)

In [15]:
for class_name, paths in train_image_paths.items():
    print(f"Number of images in class '{class_name}': {len(paths)}")
print(f"Number of testing images: {len(test_image_paths)}")

Number of images in class 'pituitary': 1457
Number of images in class 'notumor': 1595
Number of images in class 'glioma': 1321
Number of images in class 'meningioma': 1339
Number of testing images: 1311


In [16]:
input_shape = (224, 224, 3)
num_classes = len(train_classes)
batch_size = 32
epochs = 10

In [17]:
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    img = Image.open(image_path)
    img = img.resize(target_size)
    img = np.array(img) / 255.0
    return img

In [18]:
#  data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ]),
}

In [21]:
# datasets
image_datasets = {
    'train': datasets.ImageFolder(train_dir, data_transforms['train']),
    'test': datasets.ImageFolder(test_dir, data_transforms['test']),
}

# data loaders
batch_size = 32
dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=batch_size, shuffle=True),
    'test': DataLoader(image_datasets['test'], batch_size=batch_size, shuffle=False),
}

# Get the number of classes
num_classes = len(image_datasets['train'].classes)

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(32 * 56 * 56, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, num_classes),
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

# Create the model
model = SimpleCNN(num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in dataloaders['train']:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(image_datasets['train'])
    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f}")

model_save_path = 'model.pth'
torch.save(model.state_dict(), model_save_path)
print(f"Trained model saved to '{model_save_path}'")

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in dataloaders['test']:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print("Test Accuracy: {:.2f}%".format(test_accuracy * 100))

train_loss_history = []
train_accuracy_history = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for inputs, labels in dataloaders['train']:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

        # Calculate training accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    # store training loss and accuracy for the epoch
    epoch_loss = running_loss / len(image_datasets['train'])
    epoch_accuracy = correct_train / total_train
    train_loss_history.append(epoch_loss)
    train_accuracy_history.append(epoch_accuracy)

    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f} Train Accuracy: {epoch_accuracy:.2f}")

# Plot the training loss curve
plt.figure(figsize=(8, 4))
plt.plot(train_loss_history, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Curve')
plt.legend()
plt.grid(True)
plt.show()

# a few sample image predictions from the test set
num_samples_to_show = 5
selected_indices = np.random.choice(len(image_datasets['test']), num_samples_to_show, replace=False)

for index in selected_indices:
    input_image, true_label = image_datasets['test'][index]
    input_image = input_image.unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(input_image)
        _, predicted_label = torch.max(output.data, 1)
    
    # the image along with its true and predicted labels
    plt.imshow(input_image.squeeze().cpu().permute(1, 2, 0))
    plt.title(f'True Label: {image_datasets["test"].classes[true_label]}, Predicted Label: {image_datasets["test"].classes[predicted_label]}')
    plt.show()

Epoch [1/5] Loss: 0.6086
Epoch [2/5] Loss: 0.2578
Epoch [3/5] Loss: 0.1253
Epoch [4/5] Loss: 0.0714
Epoch [5/5] Loss: 0.0319
Trained model saved to 'model.pth'
Test Accuracy: 94.20%
Epoch [1/5] Loss: 0.0259 Train Accuracy: 0.99
Epoch [2/5] Loss: 0.0253 Train Accuracy: 0.99


KeyboardInterrupt: 