<h2> Importing Necessary Libraries </h2>

In [5]:
# Utilities
import os
import time  # Performance tracking

# Data Manipulation
import numpy as np

# Visualization
import matplotlib.pyplot as plt

# Machine Learning
from sklearn.manifold import TSNE
from tqdm import tqdm  #Progress bars

# PyTorch Modules
import torch
from torch import nn, optim  
from torch.utils.data import DataLoader, Dataset  
from torchvision import models, transforms  
from torchvision.datasets import ImageFolder


<h2> Data Pre-Processing and Loading </h2>

In [None]:
# Data Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Architecture requires 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization values
])

# Load Data
data_dir = '../Data/Sample Test Dataset (Prostate Cancer)/Prostate Cancer'
dataset = ImageFolder(root=data_dir, transform=transform)

# Train/Test Split
train_size = int(0.7 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# DataLoaders (Batch Size 32 is the most optmial batch size)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

<h2> Model Intilization and Setup </h2>

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet50(weights=None) #Training from Scratch

#We need to modify the final layer such that it is three classes
num_classes = len(dataset.classes)  # Number of classes in the dataset
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)

#Hyperparameters (previously tested)
num_epochs = 10
lr = 0.001

# Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

# Add Learning Rate Scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

<h2> Training and Testing the Model </h2>

In [None]:
train_acc, test_acc, train_loss, test_loss = [], [], [], []

training_start_time = time()

for epoch in range(num_epochs):
    epoch_start_time = time()
    
    # Training phase
    model.train()
    running_loss, running_corrects = 0.0, 0
    for inputs, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}", leave=False):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Update training metrics
        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)
    train_loss.append(epoch_loss)
    train_acc.append(epoch_acc.item())

    # Testing phase
    model.eval()
    running_loss, running_corrects = 0.0, 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Update testing metrics
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(test_dataset)
    epoch_acc = running_corrects.double() / len(test_dataset)
    test_loss.append(epoch_loss)
    test_acc.append(epoch_acc.item())

    # Step the scheduler
    scheduler.step()

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Train Loss: {train_loss[-1]:.4f}, Train Acc: {train_acc[-1]:.4f}, "
          f"Test Loss: {test_loss[-1]:.4f}, Test Acc: {test_acc[-1]:.4f}")
    print(f'Elapsed time for epoch {epoch+1}: {(time() - epoch_start_time):.2f} seconds')

print(f'Elapsed time for training model {(time() - training_start_time):.2f} seconds')

<h2> T-SNE Visualization </h2>

In [None]:
features, labels_list = [], []
model.eval()
with torch.no_grad():
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)

        # Extract features before the final layer
        features.append(outputs.cpu())
        labels_list.append(labels)

features = torch.cat(features).numpy()
labels_list = torch.cat(labels_list).numpy()

# Apply t-SNE
tsne = TSNE(n_components=2, random_state=42)
features_2d = tsne.fit_transform(features)

# Plot t-SNE Results
plt.figure(figsize=(10, 8))
plt.scatterplot(
    x=features_2d[:, 0], y=features_2d[:, 1], hue=labels_list, palette='viridis', legend='full'
)
plt.title("t-SNE Visualization of ResNet-50 Features")
plt.xlabel("t-SNE Dimension 1")
plt.ylabel("t-SNE Dimension 2")
plt.show()

<h2> Saving The Model </h2>
<p> Commented out since we already saved it </p>

In [None]:
# Uncomment to save the model
# os.makedirs('task_1', exist_ok=True)
# model_save_path = 'task_1/resnet50_colorectal.pth'
# torch.save(model.state_dict(), model_save_path)
# print(f"Model saved to {model_save_path}")