In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import string
from mnist_skeptic_v9 import skeptic_v9
import torch.nn.functional as F
from tabulate import tabulate
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
def load_and_visualize_csv(file_path):
    # Extract metadata from filename
    filename = os.path.basename(file_path)
    subject, digit, run = filename.split('_')[1], filename.split('_')[3], filename.split('_')[5].split('.')[0]
    
    # Read CSV data
    with open(file_path, 'r') as f:
        csv_data = f.read().strip().split('\n')
    
    # Convert CSV to numpy array
    image_data = np.array([list(map(float, row.split(','))) for row in csv_data])
    
    # Visualize
    #plt.figure(figsize=(5, 5))
    #plt.imshow(image_data, cmap='gray')
    #plt.title(f"Subject: {subject}, Digit: {digit}, Run: {run}")
    #plt.axis('off')
    #plt.show()
    
    return image_data, int(digit)

# Example usage
csv_folder = 'CSV_Images'
for file in os.listdir(csv_folder):
    if file.endswith('.txt'):
        file_path = os.path.join(csv_folder, file)
        image_data, true_label = load_and_visualize_csv(file_path)
        print(f"True Label: {true_label}")
        break  # Remove this to process all files

True Label: 7


In [3]:
class ResNet50_16x16(nn.Module):
    def __init__(self):
        super(ResNet50_16x16, self).__init__()
        self.model = models.resnet50(pretrained=False)
        self.model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.model.fc = nn.Linear(self.model.fc.in_features, 10)

    def forward(self, x):
        return self.model(x)

def load_resnet50(model_path):
    model = ResNet50_16x16()
    model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
    model.eval()
    return model.to(device)

# Uncomment to load the model
resnet_model = load_resnet50('resnet50_mnist_experiment.pth')

  model.load_state_dict(torch.load(model_path, map_location=device), strict=False)


In [4]:
# Function to create LeNet5 model for MNIST-like grayscale images
class LeNet5_16x16(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet5_16x16, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2)
        )
        self.fc1 = nn.Linear(16 * 1 * 1, 120)  # Adjusted for 16x16 input size
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)  # Flatten the tensor
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

# Function to load LeNet5 model for 16x16 images with pre-trained weights
def load_lenet5_model_16x16(file_path=None):
    model = LeNet5_16x16(num_classes=10)
    if file_path:
        model.load_state_dict(torch.load(file_path))  # Load pre-trained weights
    return model

# Uncomment to load the model
lenet_model = load_lenet5_model_16x16('lenet5_trained_model.pth')

  model.load_state_dict(torch.load(file_path))  # Load pre-trained weights


In [5]:
class ModelEnsemble:
    def __init__(self, models):
        self.models = models
    
    def predict(self, x):
        predictions = [model(x) for model in self.models]
        return torch.stack(predictions).mean(dim=0)
    
    def eval(self):
        for model in self.models:
            model.eval()

def load_ensemble_models(model_class, folder_path, num_models=20, device='cpu', verbose=True):
    models = []
    
    for idx, letter in enumerate(string.ascii_lowercase[:num_models]):
        model = model_class()
        checkpoint_path = os.path.join(folder_path, f'skeptic_v10_{letter}_finetuned.pth')
        
        try:
            if not os.path.exists(checkpoint_path):
                raise FileNotFoundError(f"Model checkpoint not found: {checkpoint_path}")
            
            state_dict = torch.load(checkpoint_path, map_location=device)
            model.load_state_dict(state_dict)
            model.to(device)
            model.eval()
            models.append(model)
            
            if verbose:
                print(f"Loaded model {idx+1}/{num_models}: {checkpoint_path}")
        except Exception as e:
            print(f"Error loading model {idx+1}/{num_models}: {str(e)}")
    
    if not models:
        raise ValueError("No models were successfully loaded.")
    
    ensemble = ModelEnsemble(models)
    ensemble.eval()
    return ensemble


ensemble = load_ensemble_models(skeptic_v9, 'saved_models/skeptic_v10', num_models=20, device=device)
print("Ensemble loaded successfully.")

  state_dict = torch.load(checkpoint_path, map_location=device)


Loaded model 1/20: saved_models/skeptic_v10\skeptic_v10_a_finetuned.pth
Loaded model 2/20: saved_models/skeptic_v10\skeptic_v10_b_finetuned.pth
Loaded model 3/20: saved_models/skeptic_v10\skeptic_v10_c_finetuned.pth
Loaded model 4/20: saved_models/skeptic_v10\skeptic_v10_d_finetuned.pth
Loaded model 5/20: saved_models/skeptic_v10\skeptic_v10_e_finetuned.pth
Loaded model 6/20: saved_models/skeptic_v10\skeptic_v10_f_finetuned.pth
Loaded model 7/20: saved_models/skeptic_v10\skeptic_v10_g_finetuned.pth
Loaded model 8/20: saved_models/skeptic_v10\skeptic_v10_h_finetuned.pth
Loaded model 9/20: saved_models/skeptic_v10\skeptic_v10_i_finetuned.pth
Loaded model 10/20: saved_models/skeptic_v10\skeptic_v10_j_finetuned.pth
Loaded model 11/20: saved_models/skeptic_v10\skeptic_v10_k_finetuned.pth
Loaded model 12/20: saved_models/skeptic_v10\skeptic_v10_l_finetuned.pth
Loaded model 13/20: saved_models/skeptic_v10\skeptic_v10_m_finetuned.pth
Loaded model 14/20: saved_models/skeptic_v10\skeptic_v10_n_f

In [6]:
def preprocess_image(image_data):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    image = Image.fromarray((image_data * 255).astype(np.uint8), mode='L')
    return transform(image).unsqueeze(0).to(device)

def evaluate_model(model, image_tensor):
    with torch.no_grad():
        output = model(image_tensor)
        predicted = output.argmax().item()
    return predicted

In [7]:
def evaluate_resnet50(csv_folder):
    resnet_model = load_resnet50('resnet50_mnist_experiment.pth')
    results = []
    total = 0
    correct = 0
    top_1_correct = 0
    top_3_correct = 0
    
    for file in os.listdir(csv_folder):
        if file.endswith('.txt'):
            total += 1
            file_path = os.path.join(csv_folder, file)
            image_data, true_label = load_and_visualize_csv(file_path)
            image_tensor = preprocess_image(image_data)
            
            if image_tensor.dim() == 5:
                image_tensor = image_tensor.squeeze(1)
            elif image_tensor.dim() != 4:
                raise ValueError(f"Unexpected tensor shape: {image_tensor.shape}")
            
            with torch.no_grad():
                output = resnet_model(image_tensor)
            
            confidence_scores = F.softmax(output, dim=1).squeeze().tolist()
            predicted = output.argmax(1).item()
            
            true_label_confidence = confidence_scores[true_label]
            predicted_confidence = confidence_scores[predicted]
            
            sorted_scores = sorted(enumerate(confidence_scores), key=lambda x: x[1], reverse=True)
            true_label_rank = [i for i, (label, _) in enumerate(sorted_scores) if label == true_label][0] + 1
            predicted_rank = [i for i, (label, _) in enumerate(sorted_scores) if label == predicted][0] + 1
            distance = abs(true_label_rank - predicted_rank)
            
            if predicted == true_label:
                correct += 1
            
            if true_label_rank == 1:
                top_1_correct += 1
            
            if true_label_rank <= 3:
                top_3_correct += 1
            
            results.append({
                'True Label': true_label,
                'Predicted Label': predicted,
                'Predicted Confidence': f"{predicted_confidence:.4f}",
                'True Label Confidence': f"{true_label_confidence:.4f}",
                'True Label Rank': true_label_rank,
                'Distance': distance
            })
    
    results_df = pd.DataFrame(results)
    
    # Calculate overall metrics
    accuracy = (correct / total) * 100
    top_1_accuracy = (top_1_correct / total) * 100
    top_3_accuracy = (top_3_correct / total) * 100
    
    return results_df, accuracy, top_1_accuracy, top_3_accuracy

# Run evaluation
evaluation_results, accuracy, top_1_accuracy, top_3_accuracy = evaluate_resnet50('CSV_Images')

# Print pretty table (showing only first 10 rows)
print(tabulate(evaluation_results.head(10), headers='keys', tablefmt='pretty', showindex=False))

# Print overall metrics
print(f"\nOverall Accuracy: {accuracy:.2f}%")
print(f"Top-1 Accuracy (true label is model's top prediction): {top_1_accuracy:.2f}%")
print(f"Top-3 Accuracy (true label is within model's top 3 predictions): {top_3_accuracy:.2f}%")

  model.load_state_dict(torch.load(model_path, map_location=device), strict=False)


+------------+-----------------+----------------------+-----------------------+-----------------+----------+
| True Label | Predicted Label | Predicted Confidence | True Label Confidence | True Label Rank | Distance |
+------------+-----------------+----------------------+-----------------------+-----------------+----------+
|     7      |        4        |        0.1997        |        0.1048         |        4        |    3     |
|     7      |        4        |        0.1962        |        0.1024         |        4        |    3     |
|     4      |        4        |        0.2092        |        0.2092         |        1        |    0     |
|     4      |        4        |        0.1843        |        0.1843         |        1        |    0     |
|     7      |        4        |        0.1955        |        0.0962         |        4        |    3     |
|     7      |        4        |        0.1971        |        0.1001         |        4        |    3     |
|     6      |     

In [8]:
def evaluate_lenet5(csv_folder, model_name):
    lenet_model = load_lenet5_model_16x16(model_name)
    lenet_model.eval()  # Set the model to evaluation mode
    
    # Check if CUDA is available and move model to GPU if possible
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    lenet_model = lenet_model.to(device)
    
    results = []
    total = 0
    correct = 0
    top_1_correct = 0
    top_3_correct = 0
    
    for file in os.listdir(csv_folder):
        if file.endswith('.txt'):
            total += 1
            file_path = os.path.join(csv_folder, file)
            image_data, true_label = load_and_visualize_csv(file_path)
            image_tensor = preprocess_image(image_data)
            
            if image_tensor.dim() == 5:
                image_tensor = image_tensor.squeeze(1)
            elif image_tensor.dim() != 4:
                raise ValueError(f"Unexpected tensor shape: {image_tensor.shape}")
            
            # Move input tensor to the same device as the model
            image_tensor = image_tensor.to(device)
            
            with torch.no_grad():
                output = lenet_model(image_tensor)
            
            confidence_scores = F.softmax(output, dim=1).squeeze().cpu().tolist()
            predicted = output.argmax(1).item()
            
            true_label_confidence = confidence_scores[true_label]
            predicted_confidence = confidence_scores[predicted]
            
            sorted_scores = sorted(enumerate(confidence_scores), key=lambda x: x[1], reverse=True)
            true_label_rank = [i for i, (label, _) in enumerate(sorted_scores) if label == true_label][0] + 1
            predicted_rank = [i for i, (label, _) in enumerate(sorted_scores) if label == predicted][0] + 1
            distance = abs(true_label_rank - predicted_rank)
            
            if predicted == true_label:
                correct += 1
            
            if true_label_rank == 1:
                top_1_correct += 1
            
            if true_label_rank <= 3:
                top_3_correct += 1
            
            results.append({
                'True Label': true_label,
                'Predicted Label': predicted,
                'Predicted Confidence': f"{predicted_confidence:.4f}",
                'True Label Confidence': f"{true_label_confidence:.4f}",
                'True Label Rank': true_label_rank,
                'Distance': distance
            })
    
    results_df = pd.DataFrame(results)
    
    # Calculate overall metrics
    accuracy = (correct / total) * 100
    top_1_accuracy = (top_1_correct / total) * 100
    top_3_accuracy = (top_3_correct / total) * 100
    
    return results_df, accuracy, top_1_accuracy, top_3_accuracy

# Run evaluation
evaluation_results, accuracy, top_1_accuracy, top_3_accuracy = evaluate_lenet5('CSV_Images', 'best_model.pth')

# Print pretty table (showing only first 10 rows)
print(tabulate(evaluation_results.head(10), headers='keys', tablefmt='pretty', showindex=False))

# Print overall metrics
print(f"\nOverall Accuracy: {accuracy:.2f}%")
print(f"Top-1 Accuracy (true label is model's top prediction): {top_1_accuracy:.2f}%")
print(f"Top-3 Accuracy (true label is within model's top 3 predictions): {top_3_accuracy:.2f}%")

+------------+-----------------+----------------------+-----------------------+-----------------+----------+
| True Label | Predicted Label | Predicted Confidence | True Label Confidence | True Label Rank | Distance |
+------------+-----------------+----------------------+-----------------------+-----------------+----------+
|     7      |        8        |        0.5577        |        0.0000         |        8        |    7     |
|     7      |        8        |        0.7969        |        0.0000         |        7        |    6     |
|     4      |        8        |        0.8689        |        0.0000         |        9        |    8     |
|     4      |        8        |        0.4294        |        0.0000         |        7        |    6     |
|     7      |        0        |        1.0000        |        0.0000         |        9        |    8     |
|     7      |        5        |        0.9627        |        0.0000         |        8        |    7     |
|     6      |     

  model.load_state_dict(torch.load(file_path))  # Load pre-trained weights


In [9]:
# Run evaluation
evaluation_results, accuracy, top_1_accuracy, top_3_accuracy = evaluate_lenet5('CSV_Images', 'lenet5_trained_model.pth')

# Print pretty table (showing only first 10 rows)
print(tabulate(evaluation_results.head(10), headers='keys', tablefmt='pretty', showindex=False))

# Print overall metrics
print(f"\nOverall Accuracy: {accuracy:.2f}%")
print(f"Top-1 Accuracy (true label is model's top prediction): {top_1_accuracy:.2f}%")
print(f"Top-3 Accuracy (true label is within model's top 3 predictions): {top_3_accuracy:.2f}%")

+------------+-----------------+----------------------+-----------------------+-----------------+----------+
| True Label | Predicted Label | Predicted Confidence | True Label Confidence | True Label Rank | Distance |
+------------+-----------------+----------------------+-----------------------+-----------------+----------+
|     7      |        6        |        0.1303        |        0.0843         |        8        |    7     |
|     7      |        6        |        0.1434        |        0.0784         |       10        |    9     |
|     4      |        1        |        0.1296        |        0.0837         |        9        |    8     |
|     4      |        6        |        0.1879        |        0.1429         |        2        |    1     |
|     7      |        5        |        0.1557        |        0.0782         |       10        |    9     |
|     7      |        5        |        0.1527        |        0.0570         |       10        |    9     |
|     6      |     

  model.load_state_dict(torch.load(file_path))  # Load pre-trained weights


In [10]:
# Run evaluation
evaluation_results, accuracy, top_1_accuracy, top_3_accuracy = evaluate_lenet5('CSV_Images', 'lenet5_trained_model2.pth')

# Print pretty table (showing only first 10 rows)
print(tabulate(evaluation_results.head(10), headers='keys', tablefmt='pretty', showindex=False))

# Print overall metrics
print(f"\nOverall Accuracy: {accuracy:.2f}%")
print(f"Top-1 Accuracy (true label is model's top prediction): {top_1_accuracy:.2f}%")
print(f"Top-3 Accuracy (true label is within model's top 3 predictions): {top_3_accuracy:.2f}%")

+------------+-----------------+----------------------+-----------------------+-----------------+----------+
| True Label | Predicted Label | Predicted Confidence | True Label Confidence | True Label Rank | Distance |
+------------+-----------------+----------------------+-----------------------+-----------------+----------+
|     7      |        4        |        0.1345        |        0.0562         |       10        |    9     |
|     7      |        5        |        0.1486        |        0.0782         |        8        |    7     |
|     4      |        9        |        0.1448        |        0.0515         |       10        |    9     |
|     4      |        6        |        0.1946        |        0.1311         |        3        |    2     |
|     7      |        2        |        0.1855        |        0.0885         |        4        |    3     |
|     7      |        5        |        0.1718        |        0.0359         |       10        |    9     |
|     6      |     

  model.load_state_dict(torch.load(file_path))  # Load pre-trained weights


In [12]:
# Run evaluation
evaluation_results, accuracy, top_1_accuracy, top_3_accuracy = evaluate_lenet5('CSV_Images', 'lenet5_trained_model_ensemble_1.pth')

# Print pretty table (showing only first 10 rows)
print(tabulate(evaluation_results.head(10), headers='keys', tablefmt='pretty', showindex=False))

# Print overall metrics
print(f"\nOverall Accuracy: {accuracy:.2f}%")
print(f"Top-1 Accuracy (true label is model's top prediction): {top_1_accuracy:.2f}%")
print(f"Top-3 Accuracy (true label is within model's top 3 predictions): {top_3_accuracy:.2f}%")

+------------+-----------------+----------------------+-----------------------+-----------------+----------+
| True Label | Predicted Label | Predicted Confidence | True Label Confidence | True Label Rank | Distance |
+------------+-----------------+----------------------+-----------------------+-----------------+----------+
|     7      |        2        |        0.2128        |        0.0848         |        5        |    4     |
|     7      |        7        |        0.2961        |        0.2961         |        1        |    0     |
|     4      |        2        |        0.2747        |        0.0581         |        9        |    8     |
|     4      |        4        |        0.1836        |        0.1836         |        1        |    0     |
|     7      |        2        |        0.2199        |        0.1835         |        2        |    1     |
|     7      |        7        |        0.2507        |        0.2507         |        1        |    0     |
|     6      |     

  model.load_state_dict(torch.load(file_path))  # Load pre-trained weights


In [11]:
def evaluate_ensemble(csv_folder, ensemble_path):
    ensemble_model = load_ensemble_models(skeptic_v9, ensemble_path, num_models=20, device=device)
    ensemble_model.eval()
    
    results = []
    total = 0
    correct = 0
    top_1_correct = 0
    top_3_correct = 0
    
    for file in os.listdir(csv_folder):
        if file.endswith('.txt'):
            total += 1
            file_path = os.path.join(csv_folder, file)
            image_data, true_label = load_and_visualize_csv(file_path)
            image_tensor = preprocess_image(image_data)
            
            if image_tensor.dim() == 5:
                image_tensor = image_tensor.squeeze(1)
            elif image_tensor.dim() != 4:
                raise ValueError(f"Unexpected tensor shape: {image_tensor.shape}")
            
            # Ensure the input tensor is on the same device as the model
            image_tensor = image_tensor.to(device)
            
            with torch.no_grad():
                output = ensemble_model.predict(image_tensor)
            
            confidence_scores = F.softmax(output, dim=1).squeeze().cpu().tolist()
            predicted = output.argmax(1).item()
            
            true_label_confidence = confidence_scores[true_label]
            predicted_confidence = confidence_scores[predicted]
            
            sorted_scores = sorted(enumerate(confidence_scores), key=lambda x: x[1], reverse=True)
            true_label_rank = [i for i, (label, _) in enumerate(sorted_scores) if label == true_label][0] + 1
            predicted_rank = [i for i, (label, _) in enumerate(sorted_scores) if label == predicted][0] + 1
            distance = abs(true_label_rank - predicted_rank)
            
            if predicted == true_label:
                correct += 1
            
            if true_label_rank == 1:
                top_1_correct += 1
            
            if true_label_rank <= 3:
                top_3_correct += 1
            
            results.append({
                'True Label': true_label,
                'Predicted Label': predicted,
                'Predicted Confidence': f"{predicted_confidence:.4f}",
                'True Label Confidence': f"{true_label_confidence:.4f}",
                'True Label Rank': true_label_rank,
                'Distance': distance
            })
    
    results_df = pd.DataFrame(results)
    
    # Calculate overall metrics
    accuracy = (correct / total) * 100
    top_1_accuracy = (top_1_correct / total) * 100
    top_3_accuracy = (top_3_correct / total) * 100
    
    return results_df, accuracy, top_1_accuracy, top_3_accuracy

# Run evaluation
ensemble_path = 'saved_models/skeptic_v10'  # Adjust this path as needed
evaluation_results, accuracy, top_1_accuracy, top_3_accuracy = evaluate_ensemble('CSV_Images', ensemble_path)

# Print pretty table (showing only first 10 rows)
print(tabulate(evaluation_results.head(10), headers='keys', tablefmt='pretty', showindex=False))

# Print overall metrics
print(f"\nOverall Accuracy: {accuracy:.2f}%")
print(f"Top-1 Accuracy (true label is model's top prediction): {top_1_accuracy:.2f}%")
print(f"Top-3 Accuracy (true label is within model's top 3 predictions): {top_3_accuracy:.2f}%")

  state_dict = torch.load(checkpoint_path, map_location=device)


Loaded model 1/20: saved_models/skeptic_v10\skeptic_v10_a_finetuned.pth
Loaded model 2/20: saved_models/skeptic_v10\skeptic_v10_b_finetuned.pth
Loaded model 3/20: saved_models/skeptic_v10\skeptic_v10_c_finetuned.pth
Loaded model 4/20: saved_models/skeptic_v10\skeptic_v10_d_finetuned.pth
Loaded model 5/20: saved_models/skeptic_v10\skeptic_v10_e_finetuned.pth
Loaded model 6/20: saved_models/skeptic_v10\skeptic_v10_f_finetuned.pth
Loaded model 7/20: saved_models/skeptic_v10\skeptic_v10_g_finetuned.pth
Loaded model 8/20: saved_models/skeptic_v10\skeptic_v10_h_finetuned.pth
Loaded model 9/20: saved_models/skeptic_v10\skeptic_v10_i_finetuned.pth
Loaded model 10/20: saved_models/skeptic_v10\skeptic_v10_j_finetuned.pth
Loaded model 11/20: saved_models/skeptic_v10\skeptic_v10_k_finetuned.pth
Loaded model 12/20: saved_models/skeptic_v10\skeptic_v10_l_finetuned.pth
Loaded model 13/20: saved_models/skeptic_v10\skeptic_v10_m_finetuned.pth
Loaded model 14/20: saved_models/skeptic_v10\skeptic_v10_n_f