In [2]:
import os
import json
import zipfile
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from mnist_skeptic_v9 import skeptic_v9
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [7]:
class CompositeDataset(Dataset):
    def __init__(self, data):
        self.data = []
        for participant_data in data:
            for item in participant_data:
                if isinstance(item, list) and len(item) > 0:
                    self.data.extend(item)
                else:
                    self.data.append(item)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        image = torch.tensor(item['composite'], dtype=torch.float32).view(1, 16, 16)
        label = torch.tensor(int(item['true_digit']), dtype=torch.long)
        return image, label
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        image = torch.tensor(item['composite'], dtype=torch.float32).view(1, 16, 16)
        label = torch.tensor(int(item['true_digit']), dtype=torch.long)
        return image, label

def load_data(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        train_data = []
        test_data = []
        for file in zip_ref.namelist():
            if file.startswith('training_data/training_set/'):
                with zip_ref.open(file) as f:
                    train_data.append(json.load(f))
            elif file.startswith('training_data/test_set/'):
                with zip_ref.open(file) as f:
                    test_data.append(json.load(f))
    return train_data, test_data

In [8]:
class EnsembleModel(nn.Module):
    def __init__(self, models):
        super(EnsembleModel, self).__init__()
        self.models = nn.ModuleList(models)
    
    def forward(self, x):
        outputs = [model(x) for model in self.models]
        return torch.mean(torch.stack(outputs), dim=0)

def train_epoch(ensemble, dataloader, criterion, optimizer, device):
    ensemble.train()
    total_loss = 0
    correct = 0
    total = 0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = ensemble(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    return total_loss / len(dataloader), 100 * correct / total

def evaluate(ensemble, dataloader, criterion, device, show_matrix=False):
    ensemble.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = ensemble(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = 100 * correct / total
    
    if show_matrix:
        cm = confusion_matrix(all_labels, all_predictions)
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.savefig('confusion_matrix_ensemble.png')
        plt.close()
    
    return total_loss / len(dataloader), accuracy, np.array(all_labels), np.array(all_predictions)

def analyze_confusion_matrix(cm):
    most_confusable_digits = {}
    most_discriminable_digits = {}
    
    for i in range(cm.shape[0]):
        row_sum = cm[i].sum()
        
        if row_sum > 0:
            confusions = cm[i] / row_sum
            
            # Most confusable digit (highest off-diagonal value)
            most_confusable_index = np.argmax(confusions[np.arange(len(confusions)) != i])
            most_confusable_digits[i] = most_confusable_index
            
            # Most discriminable digit (lowest off-diagonal value)
            most_discriminable_index = np.argmin(confusions[np.arange(len(confusions)) != i])
            most_discriminable_digits[i] = most_discriminable_index
    
    return most_confusable_digits, most_discriminable_digits


def finetune_ensemble(ensemble, train_loader, val_loader, num_epochs=50, patience=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    ensemble.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(ensemble.parameters(), lr=0.0001)
    
    best_val_acc = 0
    epochs_no_improve = 0
    
    # Create a new directory for finetuned models
    os.makedirs('finetuned_models', exist_ok=True)
    
    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(ensemble, train_loader, criterion, optimizer, device)
        val_loss, val_acc, _, _ = evaluate(ensemble, val_loader, criterion, device)
        
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            epochs_no_improve = 0
            torch.save(ensemble.state_dict(), os.path.join('finetuned_models', f'best_finetuned_ensemble_epoch_{epoch+1}.pth'))
        else:
            epochs_no_improve += 1
        
        if epochs_no_improve == patience:
            print("Early stopping")
            break
    
    # Final evaluation
    best_model_path = os.path.join('finetuned_models', f'best_finetuned_ensemble_epoch_{epoch-epochs_no_improve+1}.pth')
    ensemble.load_state_dict(torch.load(best_model_path))
    _, final_accuracy, all_labels, all_predictions = evaluate(ensemble, val_loader, criterion, device, show_matrix=True)
    
    cm = confusion_matrix(all_labels, all_predictions)
    most_confusable_digits, most_discriminable_digits = analyze_confusion_matrix(cm)
    
    print(f'Final Ensemble Accuracy: {final_accuracy:.2f}%')
    print(f'Most Confusable Digits: {most_confusable_digits}')
    print(f'Most Discriminable Digits: {most_discriminable_digits}')

In [9]:
def main():
    # Load data
    train_data, test_data = load_data('training_data.zip')
    
    # Create datasets and dataloaders
    train_dataset = CompositeDataset(train_data)
    test_dataset = CompositeDataset(test_data)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # Load ensemble model
    models = []
    for i in range(ord('a'), ord('u')):
        model_name = f'skeptic_v10_{chr(i)}'
        model = skeptic_v9()
        model.load_state_dict(torch.load(f'saved_models/skeptic_v10/{model_name}.pth'))
        models.append(model)
    
    ensemble = EnsembleModel(models)
    
    # Finetune the ensemble
    finetune_ensemble(ensemble, train_loader, test_loader, num_epochs=20)
    
    print("Finetuning completed. Best model saved as 'best_finetuned_ensemble.pth'")

if __name__ == "__main__":
    main()

  model.load_state_dict(torch.load(f'saved_models/skeptic_v10/{model_name}.pth'))


KeyError: 'composite'

In [None]:
import os
import json
import zipfile
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from mnist_skeptic_v9 import skeptic_v9
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import base64
from PIL import Image
import io

class CompositeDataset(Dataset):
    def __init__(self, data):
        self.data = []
        for participant_data in data:
            self.data.extend(participant_data)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        image = self.base64_to_image(item['composite'])
        label = torch.tensor(int(item['true_digit']), dtype=torch.long)
        return image, label
    
    def base64_to_image(self, base64_string):
        img_data = base64.b64decode(base64_string)
        img = Image.open(io.BytesIO(img_data))
        img_array = np.array(img, dtype=np.float32) / 255.0  # Normalize to [0, 1]
        return torch.tensor(img_array).unsqueeze(0)  # Add channel dimension

class SelectionDataset(Dataset):
    def __init__(self, data):
        self.data = []
        for participant_data in data:
            for trial_data in participant_data:
                self.data.extend(trial_data)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        image = self.base64_to_image(item['selected_image'])
        label = torch.tensor(int(item['true_digit']), dtype=torch.long)
        return image, label
    
    def base64_to_image(self, base64_string):
        img_data = base64.b64decode(base64_string)
        img = Image.open(io.BytesIO(img_data))
        img_array = np.array(img, dtype=np.float32) / 255.0  # Normalize to [0, 1]
        return torch.tensor(img_array).unsqueeze(0)  # Add channel dimension

def load_data(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        train_data = []
        test_data = []
        for file in zip_ref.namelist():
            if file.startswith('training_data/training_set/'):
                with zip_ref.open(file) as f:
                    train_data.append(json.load(f))
            elif file.startswith('training_data/test_set/'):
                with zip_ref.open(file) as f:
                    test_data.append(json.load(f))
    return train_data, test_data

class EnsembleModel(nn.Module):
    def __init__(self, models):
        super(EnsembleModel, self).__init__()
        self.models = nn.ModuleList(models)
    
    def forward(self, x):
        outputs = [model(x) for model in self.models]
        return torch.mean(torch.stack(outputs), dim=0)

def train_epoch(ensemble, dataloader, criterion, optimizer, device):
    ensemble.train()
    total_loss = 0
    correct = 0
    total = 0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = ensemble(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    return total_loss / len(dataloader), 100 * correct / total

def evaluate(ensemble, dataloader, criterion, device, show_matrix=False):
    ensemble.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = ensemble(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = 100 * correct / total
    
    if show_matrix:
        cm = confusion_matrix(all_labels, all_predictions)
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.savefig('confusion_matrix_ensemble.png')
        plt.close()
    
    return total_loss / len(dataloader), accuracy, np.array(all_labels), np.array(all_predictions)

def analyze_confusion_matrix(cm):
    most_confusable_digits = {}
    most_discriminable_digits = {}
    
    for i in range(cm.shape[0]):
        row_sum = cm[i].sum()
        
        if row_sum > 0:
            confusions = cm[i] / row_sum
            
            # Most confusable digit (highest off-diagonal value)
            most_confusable_index = np.argmax(confusions[np.arange(len(confusions)) != i])
            most_confusable_digits[i] = most_confusable_index
            
            # Most discriminable digit (lowest off-diagonal value)
            most_discriminable_index = np.argmin(confusions[np.arange(len(confusions)) != i])
            most_discriminable_digits[i] = most_discriminable_index
    
    return most_confusable_digits, most_discriminable_digits

def finetune_ensemble(ensemble, train_loader, val_loader, num_epochs=50, patience=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    ensemble.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(ensemble.parameters(), lr=0.0001)
    
    best_val_acc = 0
    epochs_no_improve = 0
    
    # Create a new directory for finetuned models
    os.makedirs('finetuned_models', exist_ok=True)
    
    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(ensemble, train_loader, criterion, optimizer, device)
        val_loss, val_acc, _, _ = evaluate(ensemble, val_loader, criterion, device)
        
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            epochs_no_improve = 0
            torch.save(ensemble.state_dict(), os.path.join('finetuned_models', f'best_finetuned_ensemble_epoch_{epoch+1}.pth'))
        else:
            epochs_no_improve += 1
        
        if epochs_no_improve == patience:
            print("Early stopping")
            break
    
    # Final evaluation
    best_model_path = os.path.join('finetuned_models', f'best_finetuned_ensemble_epoch_{epoch-epochs_no_improve+1}.pth')
    ensemble.load_state_dict(torch.load(best_model_path))
    _, final_accuracy, all_labels, all_predictions = evaluate(ensemble, val_loader, criterion, device, show_matrix=True)
    
    cm = confusion_matrix(all_labels, all_predictions)
    most_confusable_digits, most_discriminable_digits = analyze_confusion_matrix(cm)
    
    print(f'Final Ensemble Accuracy: {final_accuracy:.2f}%')
    print(f'Most Confusable Digits: {most_confusable_digits}')
    print(f'Most Discriminable Digits: {most_discriminable_digits}')

def main():
    # Load data
    train_data, test_data = load_data('training_data.zip')
    
    # Create datasets and dataloaders
    train_dataset = SelectionDataset(train_data)
    test_dataset = CompositeDataset(test_data)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # Load ensemble model
    models = []
    for i in range(ord('a'), ord('u')):
        model_name = f'skeptic_v10_{chr(i)}'
        model = skeptic_v9()
        model.load_state_dict(torch.load(f'saved_models/skeptic_v10/{model_name}.pth'))
        models.append(model)
    
    ensemble = EnsembleModel(models)
    
    # Finetune the ensemble
    finetune_ensemble(ensemble, train_loader, test_loader, num_epochs=20)
    
    print("Finetuning completed. Best model saved in 'finetuned_models' directory.")

if __name__ == "__main__":
    main()

  model.load_state_dict(torch.load(f'saved_models/skeptic_v10/{model_name}.pth'))


Epoch 1/20
Train Loss: 2.2668, Train Acc: 14.07%
Val Loss: 2.2819, Val Acc: 16.51%
