In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Libraries for data processing
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split, DataLoader, WeightedRandomSampler, TensorDataset, ConcatDataset, Subset, Dataset
from torchvision import datasets, transforms, models
from sklearn.metrics import precision_score, classification_report, recall_score, f1_score
from sklearn.model_selection import train_test_split
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image

# Libraries for metrics and visualization
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import confusion_matrix

# Library for file handling
import os
from google.colab import drive

  check_for_updates()


In [4]:
# Define dataset path
dataset_path = '/content/drive/My Drive/DS3'

# Define data preprocessing transformations
transform = transforms.Compose([
    # Resize all images to 224x224
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
     # Normalize pixel values to [-1, 1]
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Load the full dataset
full_dataset = datasets.ImageFolder(root=dataset_path, transform=transform)
print(f"Loaded {len(full_dataset)} images from dataset.")

# Split the dataset into train, validation, and test sets
train_size = int(0.7 * len(full_dataset))  # 70% training data
val_size = len(full_dataset) - train_size  # 30% validation data



generator = torch.Generator().manual_seed(42)  # Seed for reproducibility
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size], generator=generator)
#train_dataset, val_dataset = train_test_split(full_dataset, test_size=0.3, random_state=42)


# Apply the normalization to [-1, 1]
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize pixel values to [-1, 1]
])

# Isolate dementia samples from train_dataset
train_targets = [full_dataset.targets[i] for i in train_dataset.indices]
train_targets = np.array(train_targets)

dementia_class_idx = full_dataset.class_to_idx['Dementia']
dementia_indices = np.where(train_targets == dementia_class_idx)[0]

current_dementia_count = len(dementia_indices)

print(f"Current Dementia count: {current_dementia_count}")
print(f"Train size (original): {len(train_dataset)}")
print(f"Validation size: {len(val_dataset)}")
print(f"")
#print(f"Augmentation needed: {augment_count}")




Loaded 15435 images from dataset.
Current Dementia count: 3835
Train size (original): 10804
Validation size: 4631



In [5]:
# Set number of workers based on the system
num_workers = 4 if torch.cuda.is_available() else 0

# Create a DataLoader for the combined dataset
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=num_workers, pin_memory=True)
#test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=num_workers, pin_memory=True)

In [6]:
# Assuming `full_dataset` is your ImageFolder dataset
class_indices = full_dataset.class_to_idx

# Get the index for the 'Dementia' class
demented_class_idx = class_indices['Dementia']

# Print the class indices and the specific index for 'Dementia'
print("Class Indices Mapping:", class_indices)
print("Index for 'Dementia':", demented_class_idx)


Class Indices Mapping: {'Dementia': 0, 'Non Demnted': 1}
Index for 'Dementia': 0


removing augmetnration


Added batch data augmentation to make about 4000 samples for the demetia/demented class

In [8]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [9]:
# Load pre-trained ResNet50
model = models.resnet50(pretrained=True)

# Modify the final fully connected layer for binary classification
model.fc = nn.Linear(in_features=2048, out_features=2)

# Move the model to the device
model = model.to(device)

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Initialize learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=True)



In [10]:
# Define checkpoint path
checkpoint_path = '/content/drive/My Drive/saved-model/model_checkpoint.pth'

# Load checkpoint if it exists
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']
    best_loss = checkpoint['loss']
    print(f"Resumed training from epoch {start_epoch}, validation loss: {best_loss:.4f}")
else:
    print("No checkpoint found, starting fresh.")
    start_epoch = 0
    best_loss = float('inf')

No checkpoint found, starting fresh.


In [None]:
# Early stopping setup
patience = 3
epochs_since_improvement = 0

# Initialize lists to store metrics for plotting after each epoch
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []
epoch_times = []

# Number of epochs
num_epochs = 5
checkpoint_interval = 5  # Save checkpoint every 5 epochs

# Regular wieght loss function
criterion = nn.CrossEntropyLoss()


# Initialize best_loss before the loop
best_loss = float('inf')

# Training Loop
for epoch in range(start_epoch, num_epochs):
    print(f"\nEpoch {epoch + 1}/{num_epochs}")

    epoch_start_time = time.time()

    # Training phase
    model.train()
    running_loss, all_preds, all_labels = 0.0, [], []

    for i, (inputs, labels) in enumerate(train_loader, 1):  # Start batch index from 1
      batch_start_time = time.time()
      inputs, labels = inputs.to(device), labels.to(device)
      optimizer.zero_grad()

      outputs = model(inputs)
      loss = criterion(outputs, labels)  # Weighted loss function

      loss.backward()
      optimizer.step()

      running_loss += loss.item()
      _, preds = torch.max(outputs, 1)
      all_preds.extend(preds.cpu().numpy())
      all_labels.extend(labels.cpu().numpy())

      # Print every 10 iterations
      if i % 10 == 0:
          batch_end_time = time.time()
          batch_time = batch_end_time - batch_start_time
          print(f"Iteration {i}/{len(train_loader)} - Loss: {loss.item():.4f}, Batch Time: {batch_time:.4f} seconds")

    # Calculate training metrics after the loop ends
    avg_train_loss = running_loss / len(train_loader)
    train_accuracy = accuracy_score(all_labels, all_preds)
    train_losses.append(avg_train_loss)
    train_accuracies.append(train_accuracy)

    # Calculate the elapsed time for this epoch
    epoch_end_time = time.time()
    epoch_time = epoch_end_time - epoch_start_time
    epoch_times.append(epoch_time)

    # Validation phase
    model.eval()
    val_loss, val_preds, val_labels = 0.0, [], []  # Initialize val_loss here

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)  # Weighted loss function

            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            val_preds.extend(preds.cpu().numpy())
            val_labels.extend(labels.cpu().numpy())

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = accuracy_score(val_labels, val_preds)
    val_losses.append(avg_val_loss)
    val_accuracies.append(val_accuracy)

    print(f"\nEpoch Summary [{epoch + 1}/{num_epochs}]")
    print(f"Training -> Loss: {avg_train_loss:.4f}, Accuracy: {train_accuracy * 100:.2f}%")
    print(f"Validation -> Loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy * 100:.2f}%")
    print(f"Time taken for this epoch: {epoch_time:.2f} seconds")

    # Early stopping logic
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        epochs_since_improvement = 0
        torch.save(model.state_dict(), 'best_model.pth')
        print("Saved best model!")
    else:
        epochs_since_improvement += 1
        if epochs_since_improvement >= patience:
            print("Early stopping triggered.")
            break

    # -------------------------
    # Plot Metrics After Each Epoch
    # -------------------------
    plt.figure(figsize=(15, 5))

    # Loss Plot
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label="Training Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Loss Over Epochs")
    plt.legend()

    # Accuracy Plot
    plt.subplot(1, 2, 2)
    plt.plot(train_accuracies, label="Training Accuracy")
    plt.plot(val_accuracies, label="Validation Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Accuracy Over Epochs")
    plt.legend()

    plt.suptitle(f"Metrics After Epoch {epoch + 1}")
    plt.tight_layout()
    plt.show()

    # Debugging: Check confusion matrix after each epoch
    print("\nConfusion Matrix (Validation):")
    print(confusion_matrix(val_labels, val_preds))

    print("\nClassification Report (Validation):")
    print(classification_report(val_labels, val_preds, target_names=['Non Demented', 'Dementia']))

    # Calculate Precision, Recall, F1-score
    precision = precision_score(val_labels, val_preds, average=None)  # None for class-wise precision
    recall = recall_score(val_labels, val_preds, average=None)  # None for class-wise recall
    f1 = f1_score(val_labels, val_preds, average=None)  # None for class-wise F1-score

    # Print these metrics
    print(f"Precision (Non Demented): {precision[1]:.4f}, Precision (Dementia): {precision[0]:.4f}")
    print(f"Recall (Non Demented): {recall[1]:.4f}, Recall (Dementia): {recall[0]:.4f}")
    print(f"F1-Score (Non Demented): {f1[1]:.4f}, F1-Score (Dementia): {f1[0]:.4f}")

torch.save(model.state_dict(), '/content/drive/My Drive/saved-model/final_model.pth')
print("Training complete. Final model saved as 'final_model.pth'.")

