In [1]:
import torch
import torch.nn as nn
import numpy as np
from torchvision import datasets, transforms
import time
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import pandas as pd
import os
import json
import importlib

# Import the MLPClassifier class
from MLP_model import MLPClassifier

# --------------------------- Configuration --------------------------- #

# Paths to the saved model and data
MODEL_PATH = 'MLP_best_model.pth'           # Path to the best MLP model
DATA_ROOT = '../data'                        # Root directory for KMNIST data
CONFUSION_MATRIX_CSV = 'confusion_matrix.csv'
CONFUSION_MATRIX_PNG = 'confusion_matrix.png'
STATS_CSV = 'test_evaluation_stats.csv'

# --------------------------- Set Device --------------------------- #

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# --------------------------- Data Transformations --------------------------- #

# Define the same transformations as used during training
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),  # Normalize to [-1, 1]
    transforms.Lambda(lambda x: x.view(-1))  # Flatten the image to a vector
])

# --------------------------- Load Test Dataset --------------------------- #

# Load the KMNIST test dataset
test_dataset = datasets.KMNIST(root=DATA_ROOT, train=False, transform=transform, download=True)

# DataLoader for test dataset with batch_size=1 to measure time per image
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)

# --------------------------- Load the Saved MLP Model --------------------------- #

if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"Model file '{MODEL_PATH}' not found. Please ensure the path is correct.")

checkpoint = torch.load(MODEL_PATH, map_location=device)

# Extract model configuration
model_config = checkpoint.get('model_config', {})
if not model_config:
    raise KeyError("Model configuration 'model_config' not found in the checkpoint.")

input_size = model_config.get('input_size')
output_size = model_config.get('output_size')
layer_sizes = model_config.get('layer_sizes')
activation_functions = model_config.get('activation_functions')
dropout_rates = model_config.get('dropout_rates')
batch_norm = model_config.get('batch_norm')

if not all([input_size, output_size, layer_sizes, activation_functions, dropout_rates, batch_norm is not None]):
    raise ValueError("Incomplete model configuration found in the checkpoint.")

# ------------------- Helper Function to Reconstruct Activation Functions ------------------- #

def get_activation_class(act_str):
    """
    Given a string representation of an activation function class, return the actual class.
    Example: "<class 'torch.nn.modules.activation.LeakyReLU'>" -> nn.LeakyReLU
    """
    # Remove angle brackets and extract the class path
    act_str_clean = act_str.strip("<class '>'")
    try:
        module_path, class_name = act_str_clean.rsplit('.', 1)
        module = importlib.import_module(module_path)
        act_class = getattr(module, class_name)
        return act_class
    except (ValueError, ImportError, AttributeError) as e:
        raise ValueError(f"Error reconstructing activation function from string '{act_str}': {e}")

# Reconstruct activation functions
reconstructed_activations = []
for act_str in activation_functions:
    act_class = get_activation_class(act_str)
    # If the activation class requires parameters, you can modify this as needed
    if act_class == nn.LeakyReLU:
        # Assuming default slope of 0.01 if not saved
        reconstructed_activations.append(nn.LeakyReLU(negative_slope=0.01))
    else:
        reconstructed_activations.append(act_class())

# Initialize the MLP model
model = MLPClassifier(
    input_size=input_size,
    layer_sizes=layer_sizes,
    output_size=output_size,
    activation_functions=reconstructed_activations,
    dropout_rates=dropout_rates,
    batch_norm=batch_norm,
    weight_init=None  # Assuming weights are loaded from checkpoint
).to(device)

# Load the model state dictionary
model.load_state_dict(checkpoint['model_state_dict'])

# Set the model to evaluation mode
model.eval()
print("Model loaded and set to evaluation mode.")

# --------------------------- Evaluation on Test Data --------------------------- #

all_labels = []
all_preds = []
computation_times = []

with torch.no_grad():
    for idx, (inputs, labels) in enumerate(test_loader, 1):
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Synchronize CUDA for accurate timing if using GPU
        if device.type == 'cuda':
            torch.cuda.synchronize()
        start_time = time.time()
        
        outputs = model(inputs)
        
        if device.type == 'cuda':
            torch.cuda.synchronize()
        end_time = time.time()
        
        computation_time = end_time - start_time
        computation_times.append(computation_time)
        
        _, predicted = torch.max(outputs.data, 1)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())
        
        if idx % 10000 == 0 or idx == len(test_loader):
            print(f"Processed {idx}/{len(test_loader)} images.")

print("Evaluation on test data completed.")

# --------------------------- Compute Confusion Matrix --------------------------- #

cm = confusion_matrix(all_labels, all_preds)
class_names = [str(i) for i in range(10)]  # KMNIST has 10 classes labeled 0-9

# Save confusion matrix as CSV
cm_df = pd.DataFrame(cm, index=class_names, columns=class_names)
cm_csv_path = CONFUSION_MATRIX_CSV
cm_df.to_csv(cm_csv_path)
print(f"Confusion matrix saved as CSV to '{cm_csv_path}'.")

# Save confusion matrix as PNG
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.ylabel('True Labels')
plt.xlabel('Predicted Labels')
plt.title('Confusion Matrix - MLP Classifier on KMNIST Test Set')
plt.tight_layout()
cm_png_path = CONFUSION_MATRIX_PNG
plt.savefig(cm_png_path)
plt.close()
print(f"Confusion matrix saved as PNG to '{cm_png_path}'.")

# --------------------------- Compute Statistics --------------------------- #

computation_times_np = np.array(computation_times)
mean_time = np.mean(computation_times_np)
variance_time = np.var(computation_times_np)

# Compute accuracy
correct = np.sum(np.array(all_preds) == np.array(all_labels))
total = len(all_labels)
accuracy = 100 * correct / total

# --------------------------- Save Evaluation Statistics --------------------------- #

stats = {
    'mean_inference_time_per_image_sec': mean_time,
    'variance_inference_time_per_image_sec2': variance_time,
    'accuracy_percentage': accuracy
}

stats_df = pd.DataFrame([stats])
stats_csv_path = STATS_CSV

# Check if the CSV exists; if not, write headers
write_header = not os.path.exists(stats_csv_path)

stats_df.to_csv(stats_csv_path, mode='a', header=write_header, index=False)
print(f"Test evaluation statistics saved to '{stats_csv_path}'.")
print(f"Accuracy on test set: {accuracy:.2f}%")
print(f"Mean inference time per image: {mean_time:.6f} seconds")
print(f"Variance of inference time per image: {variance_time:.6f} seconds²")


Using device: cuda


  checkpoint = torch.load(MODEL_PATH, map_location=device)


Model loaded and set to evaluation mode.
Processed 10000/10000 images.
Evaluation on test data completed.
Confusion matrix saved as CSV to 'confusion_matrix.csv'.
Confusion matrix saved as PNG to 'confusion_matrix.png'.
Test evaluation statistics saved to 'test_evaluation_stats.csv'.
Accuracy on test set: 85.26%
Mean inference time per image: 0.001011 seconds
Variance of inference time per image: 0.000005 seconds²
Inference time distribution plot saved to 'inference_time_distribution.png'.
