In [None]:
# imports
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report

In [None]:
def report(y_test, y_pred_labels):
    print("Classification Report:")
    print(classification_report(y_test, y_pred_labels))

In [None]:
# model analysis function
def model_analysis(model_path):
    # Load the pre-trained model
    model = torch.load(os.path.join(os.getcwd(), model_path))
    model.eval()  # Ensure the model is in evaluation mode
    
    # Display the model summary
    print(model)

    # Function to count the model's trainable parameters
    def count_params(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)

    num_params = count_params(model)
    print(f'Number of trainable parameters: {num_params}')

    # Define image transformations
    transform = transforms.Compose([
        transforms.ToTensor(),  # Convert images to tensors
        transforms.Normalize((0.5,), (0.5,))  # Normalize the data
    ])

    # Load the MNIST test dataset
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    # Model evaluation on the test dataset
    correct = 0
    total = 0
    all_labels = []
    all_preds = []
    images_sample = []
    labels_sample = []
    preds_sample = []

    with torch.no_grad():  # No need to compute gradients during evaluation
        for images, labels in test_loader:
            outputs = model(images)  # Pass the images through the model
            _, predicted = torch.max(outputs, 1)  # Get the predictions
            total += labels.size(0)  # Count the number of samples
            correct += (predicted == labels).sum().item()  # Count correct predictions

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

            # Save some images and predictions for visualization
            if len(images_sample) < 10:
                images_sample.extend(images[:10].cpu())
                labels_sample.extend(labels[:10].cpu().numpy())
                preds_sample.extend(predicted[:10].cpu().numpy())

    # Calculate accuracy
    accuracy = correct / total
    print(f'Model accuracy on the MNIST test set: {accuracy * 100:.2f}%')

    # Accuracy plot
    fig, ax = plt.subplots(1, 1, figsize=(8, 6))
    ax.plot(all_labels[:100], all_preds[:100], 'bo', label='Predictions vs True Labels')
    ax.set_xlabel('True Labels')
    ax.set_ylabel('Predictions')
    ax.set_title('Comparison of Predictions with True Labels')
    ax.legend()
    plt.show()

    # Image predictions visualization
    fig, axes = plt.subplots(2, 5, figsize=(10, 5))
    axes = axes.ravel()
    for i in np.arange(10):
        axes[i].imshow(images_sample[i].squeeze(), cmap='gray')
        axes[i].set_title(f"True: {labels_sample[i]} Pred: {preds_sample[i]}")
        axes[i].axis('off')
    plt.show()
    
    # Classification report
    report(all_labels, all_preds)

In [None]:
# models to analyze
model0_path = "robust/models/18_02_2025_13_26_47/participant_1_round_0_model.pth"
model4_path = "robust/models/18_02_2025_13_26_47/participant_1_round_4_model.pth"
model9_path = "robust/models/18_02_2025_13_26_47/participant_1_round_9_model.pth"

In [None]:
# Analyze first model
model_analysis(model0_path)

In [None]:
# Analyze mid model
model_analysis(model4_path)

In [None]:
# Analyze last model
model_analysis(model9_path)