In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import os
import sys
from tqdm import tqdm
import numpy as np

# Import our custom modules
from datasets import SimpleMNISTDataset, prepare_mnist_data, get_mnist_transforms
from Load_Model import load_mnist_model, model_details

In [2]:
def test_model_performance(model, test_loader, device, model_name):
    """
    Test model performance on the test dataset
    """
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for batch_idx, (data, target, _) in enumerate(tqdm(test_loader, desc=f"Testing {model_name}")):
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            
            # For models that return log_softmax, we need to get the predicted class
            if isinstance(outputs, torch.Tensor) and outputs.dim() == 2:
                pred = outputs.argmax(dim=1, keepdim=True)
            else:
                # Handle case where model might return tuple or different format
                pred = outputs.argmax(dim=1, keepdim=True)
            
            correct += pred.eq(target.view_as(pred)).sum().item()
            total += target.size(0)
    
    accuracy = 100. * correct / total
    print(f"{model_name} Test Accuracy: {correct}/{total} ({accuracy:.2f}%)")
    return accuracy

In [3]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Prepare MNIST data if not already present
print("Preparing MNIST dataset...")
prepare_mnist_data()

# Get transforms
transform_train, transform_test = get_mnist_transforms()

# Create test dataset
test_dataset = SimpleMNISTDataset(
    path_to_data='./MNIST_Data',
    csv_filename='clean.csv',
    data_transform=transform_test
)

test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2)
print(f"Test dataset size: {len(test_dataset)}")

# Define model paths (selecting two different models)
model_paths = [
    './Odysseus-MNIST/Models/Model_1.pth',
    './Odysseus-MNIST/Models/Model_10.pth'
]

# Test each model
results = {}

for i, model_path in enumerate(model_paths):
    if not os.path.exists(model_path):
        print(f"Warning: Model {model_path} not found, skipping...")
        continue
        
    print(f"\n{'='*60}")
    print(f"Testing Model {i+1}: {os.path.basename(model_path)}")
    print(f"{'='*60}")
    
    # Get model details
    print("Model Details:")
    model_details(model_path)
    
    # Load model
    print(f"\nLoading model from {model_path}...")
    model, mapping = load_mnist_model(model_path, device)
    
    # Test model performance
    accuracy = test_model_performance(model, test_loader, device, f"Model_{i+1}")
    results[f"Model_{i+1}"] = {
        'path': model_path,
        'accuracy': accuracy,
        'mapping': mapping
    }
    
    # Clean up
    del model
    torch.cuda.empty_cache() if torch.cuda.is_available() else None

# Print summary
print(f"\n{'='*60}")
print("TEST SUMMARY")
print(f"{'='*60}")
for model_name, result in results.items():
    print(f"{model_name}: {result['accuracy']:.2f}% accuracy")
    if result['mapping'] is not None:
        print(f"  Mapping: {result['mapping']}")

print(f"\nDataset and transforms verification completed successfully!")
print(f"All models loaded and tested on MNIST test dataset.")


Using device: cuda
Preparing MNIST dataset...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10000/10000 [00:00<00:00, 13201.85it/s]
  checkpoint = torch.load(model_path, map_location="cpu")
  checkpoint = torch.load(model_path)


Saved 10000 test images to ./MNIST_Data/clean
Saved CSV to ./MNIST_Data/clean.csv
Test dataset size: 10000

Testing Model 1: Model_1.pth
Model Details:

ðŸ“‚ Model file: Model_1.pth
ðŸ“Š Model Metadata:

Model Category: clean
Architecture_Name: Model_Google_3
Learning_Rate: 0.01
Loss Function: CrossEntropyLoss
optimizer: SGD
Momentum: 0.9
Weight decay: 0.0005
num_workers: 4
Pytorch version: 1.4.0
Trigger type: N/A
Trigger Size: N/A
Trigger_location: N/A
Mapping: N/A
Normalization Type: Min_Max
Mapping Type: N/A
Dataset: MNIST
Batch Size: 128
trigger_fraction: N/A
test_clean_acc: 99.44
test_trigerred_acc: N/A
epoch: 15

Loading model from ./Odysseus-MNIST/Models/Model_1.pth...
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fra

  return F.log_softmax(output)
Testing Model_1: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 79/79 [00:00<00:00, 133.45it/s]


Model_1 Test Accuracy: 9944/10000 (99.44%)

Testing Model 2: Model_10.pth
Model Details:

ðŸ“‚ Model file: Model_10.pth
ðŸ“Š Model Metadata:

Model Category: clean
Architecture_Name: Model_Google_1
Learning_Rate: 0.01
Loss Function: CrossEntropyLoss
optimizer: SGD
Momentum: 0.9
Weight decay: 0.0005
num_workers: 4
Pytorch version: 1.4.0
Trigger type: N/A
Trigger Size: N/A
Trigger_location: N/A
Mapping: N/A
Normalization Type: Min_Max
Mapping Type: N/A
Dataset: MNIST
Batch Size: 128
trigger_fraction: N/A
test_clean_acc: 99.45
test_trigerred_acc: N/A
epoch: 14

Loading model from ./Odysseus-MNIST/Models/Model_10.pth...
keys are : dict_keys(['net', 'Model Category', 'Architecture_Name', 'Learning_Rate', 'Loss Function', 'optimizer', 'Momentum', 'Weight decay', 'num_workers', 'Pytorch version', 'Trigger type', 'Trigger Size', 'Trigger_location', 'Mapping', 'Normalization Type', 'Mapping Type', 'Dataset', 'Batch Size', 'trigger_fraction', 'test_clean_acc', 'test_trigerred_acc', 'epoch'])
==>

  return F.log_softmax(output)
Testing Model_2: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 79/79 [00:00<00:00, 223.14it/s]

Model_2 Test Accuracy: 9946/10000 (99.46%)

TEST SUMMARY
Model_1: 99.44% accuracy
Model_2: 99.46% accuracy

Dataset and transforms verification completed successfully!
All models loaded and tested on MNIST test dataset.



