In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import KFold
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision import datasets, transforms
from torch.utils.data import Dataset
import torch.nn.functional as F
from torch.utils.data import Sampler

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import random
import itertools
from scipy import stats

  from pandas.core import (


In [2]:
import random
random.seed(42)

import torch
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

In [3]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

## Define Dataset

In [4]:
# Function to assign source names to the dataset equally
def assign_sources_equally(dataset, sources=('A', 'B', 'C', 'D', 'E','F')):
    num_sources = len(sources)
    num_data = len(dataset)
    num_each = num_data // num_sources
    
    # Convert sources to a list if it's a tuple
    sources_list = list(sources)
    
    # Create a list of source labels, each repeated equally
    source_labels = sources_list * num_each + [sources_list[i] for i in range(num_data % num_sources)]
    
    # Shuffle the labels to randomize their order
    np.random.shuffle(source_labels)
    
    return source_labels

# Assign sources to training and test data
training_sources = assign_sources_equally(training_data)
test_sources = assign_sources_equally(test_data)

# Example of how you can use the assigned sources
print("First 10 source labels for training data:", training_sources[:10])

First 10 source labels for training data: ['A', 'B', 'C', 'E', 'A', 'C', 'D', 'F', 'F', 'F']


In [5]:
def assign_participants_to_train_and_test(train_labels, test_labels, num_participants_per_source, train_images_per_participant):
    participants = {source: [] for source in set(train_labels)}
    global_participant_id = 0
    
    # Calculate number of test images for participants
    num_participants_total = num_participants_per_source * len(participants)
    test_images_per_participant = 10000 // num_participants_total
    extra_test_images = 10000 % num_participants_total

    for source in participants:
        train_indices = [i for i, s in enumerate(train_labels) if s == source]
        test_indices = [i for i, s in enumerate(test_labels) if s == source]

        np.random.shuffle(train_indices)
        np.random.shuffle(test_indices)

        for i in range(num_participants_per_source):
            train_participant_indices = train_indices[i * train_images_per_participant: (i + 1) * train_images_per_participant]
            
            # Assign 166 or 167 test images per participant
            if global_participant_id < extra_test_images:
                test_participant_indices = test_indices[i * test_images_per_participant: (i + 1) * test_images_per_participant + 1]
            else:
                test_participant_indices = test_indices[i * test_images_per_participant: (i + 1) * test_images_per_participant]
            
            participants[source].append((global_participant_id, train_participant_indices, test_participant_indices))
            global_participant_id += 1

    return participants

# Parameters
num_participants_per_source = 10
train_images_per_participant = 1000

# Assign participant IDs to both training and test data
participants = assign_participants_to_train_and_test(training_sources, test_sources, num_participants_per_source, train_images_per_participant)

# Example of how you can use the assigned participants
for source, participant_info in participants.items():
    print(f"Source: {source}")
    for participant_id, train_indices, test_indices in participant_info:
        print(f"  Participant {participant_id} - Train Indices: {train_indices[:10]}..., Test Indices: {test_indices[:10]}...")

# Output first 10 source labels for training data
print("First 10 source labels for training data:", training_sources[:10])

Source: F
  Participant 0 - Train Indices: [13750, 343, 32308, 30125, 4989, 42666, 37965, 34196, 31930, 16614]..., Test Indices: [969, 5876, 2092, 2456, 866, 8046, 6221, 387, 9381, 9076]...
  Participant 1 - Train Indices: [30586, 48752, 46657, 49753, 10336, 41622, 9419, 52893, 34417, 45952]..., Test Indices: [4953, 6907, 6597, 8700, 8075, 5081, 419, 3266, 1531, 4996]...
  Participant 2 - Train Indices: [57544, 2031, 53786, 54601, 15929, 36830, 706, 47898, 24897, 25672]..., Test Indices: [8534, 1173, 9634, 3662, 7378, 1054, 1066, 5613, 853, 5949]...
  Participant 3 - Train Indices: [47492, 51330, 2759, 52339, 13772, 3397, 7417, 3833, 21394, 11256]..., Test Indices: [7541, 6065, 6920, 9065, 5562, 9454, 1492, 1855, 8827, 8432]...
  Participant 4 - Train Indices: [59967, 34016, 38632, 39295, 51639, 44341, 40921, 28617, 37502, 59367]..., Test Indices: [6371, 8346, 8860, 4152, 5200, 3564, 8925, 1045, 5719, 5322]...
  Participant 5 - Train Indices: [159, 56553, 3638, 28594, 28661, 12631, 564

In [6]:
class ParticipantCustomMNIST(Dataset):
    def __init__(self, mnist_dataset, source_labels, participants, transform=None, train=True):
        self.mnist_dataset = mnist_dataset
        self.source_labels = source_labels
        self.participants = participants
        self.transform = transform
        self.train = train

        self.data = self._create_data()

    def _create_data(self):
        data = []
        for source, participant_data in self.participants.items():
            for participant_id, train_indices, test_indices in participant_data:
                indices = train_indices if self.train else test_indices
                for idx in indices:
                    image, label = self.mnist_dataset[idx]
                    source_label = self.source_labels[idx]
                    data.append((image, label, source_label, participant_id))
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label, source_label, participant_id = self.data[idx]

        if source_label == 'A':
            image = transforms.functional.rotate(image, 180)
        elif source_label == 'B':
            c, h, w = image.shape
            num_pixels = h * w
            num_missing = num_pixels // 2
            mask = torch.randperm(num_pixels)[:num_missing]
            mask_h, mask_w = mask // w, mask % w
            image[:, mask_h, mask_w] = 1
        elif source_label == 'C':
            noise = torch.randn_like(image) * 0.5
            image = image + noise
            image = torch.clamp(image, 0, 1)
        elif source_label == 'D':
            label_permutation = {0: 9, 1: 8, 2: 7, 3: 6, 4: 5, 5: 4, 6: 3, 7: 2, 8: 1, 9: 0}
            label = label_permutation[label]
        elif source_label == 'E':
            pass
        elif source_label == 'F':
            pass
        else:
            raise ValueError("Unknown source label provided: must be 'A', 'B', 'C', 'D', or 'E', or 'F'")

        if self.transform:
            image = self.transform(image)

        image = torch.flatten(image)
        return image, label, source_label, participant_id

In [7]:
# Create custom datasets with participants
train_dataset = ParticipantCustomMNIST(training_data, training_sources, participants, transform=None, train=True)
test_dataset = ParticipantCustomMNIST(test_data, test_sources, participants, transform=None, train=False)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

In [9]:
for images, labels, source_labels, participant_ids in train_loader:
    print("Images shape:", images.shape)
    print("Labels shape:", labels.shape)

    print("Participant IDs shape:", participant_ids.shape)
    break

Images shape: torch.Size([128, 784])
Labels shape: torch.Size([128])
Participant IDs shape: torch.Size([128])


## ID embedding

In [8]:
class EmbeddingMLP(nn.Module):
    def __init__(self, input_size, output_size, num_patient_ids, embedding_dim, dropout_rate):
        super(EmbeddingMLP, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=num_patient_ids, embedding_dim=embedding_dim)
        self.linear1 = nn.Linear(input_size + embedding_dim, 100)
        self.linear2 = nn.Linear(100, 100)
        self.linear3 = nn.Linear(100, output_size)
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, data, participant_ids):
        # Embed the participant IDs
        embedded_ids = self.embedding(participant_ids)

        # Concatenate the embedded participant IDs with the input data
        x = torch.cat((data, embedded_ids), dim=1)

        # Pass through the MLP layers
        x = F.relu(self.linear1(x))
        x = self.dropout(x)
        x = F.relu(self.linear2(x))
        x = self.dropout(x)
        y_pred = self.linear3(x)
        return y_pred

In [9]:
# Define the train_model function
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_predictions = 0
        total_predictions = 0

        # Initialize source-specific counters
        source_correct = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}
        source_total = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}

        for batch_idx, (images, labels, source_labels, participant_ids) in enumerate(train_loader):

            optimizer.zero_grad()
            outputs = model(images, participant_ids)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Update running loss
            running_loss += loss.item() * images.size(0)

            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

            # Update source-specific counters
            for i, (source, pred, label) in enumerate(zip(source_labels, predicted, labels)):
                if pred == label:
                    source_correct[source] += 1
                source_total[source] += 1

        # Calculate epoch loss and overall accuracy
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_accuracy = (correct_predictions / total_predictions) * 100

        # Display overall accuracy and loss
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

        # Display source-specific accuracies
        for source in source_correct:
            if source_total[source] > 0:
                source_accuracy = (source_correct[source] / source_total[source]) * 100
                print(f'Accuracy for Source {source}: {source_accuracy:.2f}%')

def validate_model(model, val_loader, criterion):
    model.eval()
    val_running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    source_correct = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}
    source_total = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}

    with torch.no_grad():
        for batch_idx, (images, labels, source_labels, participant_ids) in enumerate(val_loader):
            outputs = model(images, participant_ids)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item() * images.size(0)

            _, predicted = torch.max(outputs, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

            for source, pred, label in zip(source_labels, predicted, labels):
                if pred == label:
                    source_correct[source] += 1
                source_total[source] += 1

    val_loss = val_running_loss / len(val_loader.dataset)
    val_accuracy = correct_predictions / total_predictions * 100.0

    source_accuracies = {}
    for source in source_correct:
        if source_total[source] > 0:
            source_accuracy = (source_correct[source] / source_total[source]) * 100
            source_accuracies[source] = source_accuracy

    return val_loss, val_accuracy, source_accuracies

In [10]:
# Hyperparameter grid
hyperparameter_grid = {
    'lr': [0.001, 0.005, 0.01],
    'dropout_rate': [0, 0.2, 0.5]
}

best_accuracy = 0
best_model_state = None
best_hyperparams = None

# Create all possible combinations of hyperparameters
all_combinations = list(itertools.product(*hyperparameter_grid.values()))

# Cross-validation and hyperparameter tuning
kf = KFold(n_splits=5, shuffle=True, random_state=42)
criterion = nn.CrossEntropyLoss()

for lr, dropout_rate in all_combinations:
    fold_accuracies = []
    fold_source_accuracies = {'A': [], 'B': [], 'C': [], 'D': [], 'E': [], 'F': []}
    print(f'Testing parameters: lr={lr}, dropout_rate={dropout_rate}')
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(train_dataset)))):
        print(f'Starting Fold {fold+1}')
        train_subset = Subset(train_dataset, train_idx)
        val_subset = Subset(train_dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=128, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=128, shuffle=False)

        model = EmbeddingMLP(input_size=784, output_size=10, num_patient_ids=num_participants_per_source * 6, embedding_dim=10, dropout_rate=dropout_rate)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        train_model(model, train_loader, criterion, optimizer, num_epochs=10)
        val_loss, val_acc, source_accuracies = validate_model(model, val_loader, criterion)
        fold_accuracies.append(val_acc)
        for source in source_accuracies:
            fold_source_accuracies[source].append(source_accuracies[source])

    mean_accuracy = np.mean(fold_accuracies)
    std_accuracy = np.std(fold_accuracies)
    if mean_accuracy > best_accuracy:
        best_accuracy = mean_accuracy
        best_hyperparams = {'lr': lr, 'dropout_rate': dropout_rate}
        best_model = model

    print(f'Parameters: lr={lr}, dropout_rate={dropout_rate}, Mean Accuracy: {mean_accuracy:.2f}%, Std Dev: {std_accuracy:.2f}%')
    for source in fold_source_accuracies:
        mean_source = np.mean(fold_source_accuracies[source])
        std_source = np.std(fold_source_accuracies[source])
        print(f'Source {source} - Mean: {mean_source:.2f}%, Std Dev: {std_source:.2f}%')

print(f'Best Hyperparameters: {best_hyperparams}, with mean accuracy: {best_accuracy:.2f}%, Std Dev: {std_accuracy:.2f}%')



Testing parameters: lr=0.001, dropout_rate=0
Starting Fold 1
Epoch 1/10, Loss: 1.2560, Accuracy: 58.06%
Accuracy for Source A: 54.32%
Accuracy for Source B: 54.45%
Accuracy for Source C: 70.60%
Accuracy for Source D: 14.70%
Accuracy for Source E: 78.23%
Accuracy for Source F: 75.65%
Epoch 2/10, Loss: 0.6552, Accuracy: 79.27%
Accuracy for Source A: 83.10%
Accuracy for Source B: 46.69%
Accuracy for Source C: 83.56%
Accuracy for Source D: 84.61%
Accuracy for Source E: 90.23%
Accuracy for Source F: 87.62%
Epoch 3/10, Loss: 0.6027, Accuracy: 80.59%
Accuracy for Source A: 89.09%
Accuracy for Source B: 33.37%
Accuracy for Source C: 85.86%
Accuracy for Source D: 90.71%
Accuracy for Source E: 92.65%
Accuracy for Source F: 92.13%
Epoch 4/10, Loss: 0.5993, Accuracy: 79.87%
Accuracy for Source A: 90.85%
Accuracy for Source B: 22.88%
Accuracy for Source C: 86.70%
Accuracy for Source D: 92.40%
Accuracy for Source E: 93.57%
Accuracy for Source F: 93.13%
Epoch 5/10, Loss: 0.5931, Accuracy: 79.55%
Accu

In [11]:
# Bootstrap sampling and testing
def bootstrap_train_and_test(model, train_data, test_loader, criterion, optimizer_params, num_epochs=10, num_bootstrap=5, sample_percentage=0.8):
    bootstrap_accuracies = []
    source_bootstrap_accuracies = {source: [] for source in 'ABCDEF'}
    for i in range(num_bootstrap):
        # Create a bootstrap sample from the training data
        indices = np.random.choice(len(train_data), size=int(sample_percentage * len(train_data)), replace=False)
        bootstrap_subset = Subset(train_data, indices)
        bootstrap_loader = DataLoader(bootstrap_subset, batch_size=128, shuffle=True)

        # Initialize and train the model
        model.apply(lambda m: m.reset_parameters() if hasattr(m, 'reset_parameters') else None)  # Reset model parameters
        optimizer = torch.optim.Adam(model.parameters(), **optimizer_params)

        for epoch in range(num_epochs):
            model.train()
            for images, labels, source_labels, participant_ids in bootstrap_loader:
                optimizer.zero_grad()
                outputs = model(images, participant_ids)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

        # Evaluate on the test set
        model.eval()
        test_loss, correct, total = 0, 0, 0
        source_counts = {s: 0 for s in 'ABCDEF'}
        source_correct = {s: 0 for s in 'ABCDEF'}
        with torch.no_grad():
            for images, labels, source_labels, participant_ids in test_loader:
                outputs = model(images, participant_ids)
                loss = criterion(outputs, labels)
                test_loss += loss.item() * labels.size(0)
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)
                for i, source in enumerate(source_labels):  # Assuming source labels are in the labels (change if different)
                    source_counts[source] += 1
                    if predicted[i] == labels[i]:
                        source_correct[source] += 1

        accuracy = correct / total * 100.0
        bootstrap_accuracies.append(accuracy)
        source_accuracies = {s: (source_correct[s] / source_counts[s] * 100) if source_counts[s] > 0 else 0 for s in 'ABCDEF'}
        for source in source_accuracies:
            source_bootstrap_accuracies[source].append(source_accuracies[source])

    mean_accuracy = np.mean(bootstrap_accuracies)
    std_accuracy = np.std(bootstrap_accuracies)
    mean_source_accuracies = {s: np.mean(source_bootstrap_accuracies[s]) for s in 'ABCDEF'}
    std_source_accuracies = {s: np.std(source_bootstrap_accuracies[s]) for s in 'ABCDEF'}

    return mean_accuracy, std_accuracy, mean_source_accuracies, std_source_accuracies

test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Perform bootstrap training and testing
mean_bootstrap, std_bootstrap, mean_source_accuracies, std_source_accuracies = bootstrap_train_and_test(
    best_model, train_dataset, test_loader, criterion, {'lr': best_hyperparams['lr']}, num_epochs=10
)

print(f'Bootstrap results: Mean accuracy: {mean_bootstrap:.2f}%, Std Dev: {std_bootstrap:.2f}%')
for source in mean_source_accuracies:
    print(f'Source {source} - Bootstrap Mean: {mean_source_accuracies[source]:.2f}%, Std Dev: {std_source_accuracies[source]:.2f}%')

Bootstrap results: Mean accuracy: 81.77%, Std Dev: 2.87%
Source A - Bootstrap Mean: 93.17%, Std Dev: 0.37%
Source B - Bootstrap Mean: 22.66%, Std Dev: 16.36%
Source C - Bootstrap Mean: 88.37%, Std Dev: 0.41%
Source D - Bootstrap Mean: 94.53%, Std Dev: 0.48%
Source E - Bootstrap Mean: 96.35%, Std Dev: 0.17%
Source F - Bootstrap Mean: 95.66%, Std Dev: 0.33%


## Source embedding

In [12]:
source_mapping = {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5}

# Function to map source labels to indices
def map_source_labels_to_indices(source_labels):
    return [source_mapping[source] for source in source_labels]

In [13]:
class SourceEmbeddingMLP(nn.Module):
    def __init__(self, input_size, output_size, num_sources, embedding_dim, dropout_rate):
        super(SourceEmbeddingMLP, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=num_sources, embedding_dim=embedding_dim)
        self.linear1 = nn.Linear(input_size + embedding_dim, 100)
        self.linear2 = nn.Linear(100, 100)
        self.linear3 = nn.Linear(100, output_size)
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, data, source_ids):
        # Embed the source IDs
        embedded_ids = self.embedding(source_ids)

        # Concatenate the embedded source IDs with the input data
        x = torch.cat((data, embedded_ids), dim=1)

        # Pass through the MLP layers
        x = F.relu(self.linear1(x))
        x = self.dropout(x)
        x = F.relu(self.linear2(x))
        x = self.dropout(x)
        y_pred = self.linear3(x)
        return y_pred

In [14]:
# Define the train_model function
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_predictions = 0
        total_predictions = 0

        # Initialize source-specific counters
        source_correct = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}
        source_total = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}

        for batch_idx, (images, labels, source_labels, participant_ids) in enumerate(train_loader):
            # Convert source_labels to tensor of source IDs
            source_ids = torch.tensor(map_source_labels_to_indices(source_labels), dtype=torch.long)

            optimizer.zero_grad()
            outputs = model(images, source_ids)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Update running loss
            running_loss += loss.item() * images.size(0)

            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

            # Update source-specific counters
            for i, (source, pred, label) in enumerate(zip(source_labels, predicted, labels)):
                if pred == label:
                    source_correct[source] += 1
                source_total[source] += 1

        # Calculate epoch loss and overall accuracy
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_accuracy = (correct_predictions / total_predictions) * 100

        # Display overall accuracy and loss
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

        # Display source-specific accuracies
        for source in source_correct:
            if source_total[source] > 0:
                source_accuracy = (source_correct[source] / source_total[source]) * 100
                print(f'Accuracy for Source {source}: {source_accuracy:.2f}%')

def validate_model(model, val_loader, criterion):
    model.eval()
    val_running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    source_correct = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}
    source_total = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}

    with torch.no_grad():
        for batch_idx, (images, labels, source_labels, participant_ids) in enumerate(val_loader):
            # Convert source_labels to tensor of source IDs
            source_ids = torch.tensor(map_source_labels_to_indices(source_labels), dtype=torch.long)

            outputs = model(images, source_ids)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item() * images.size(0)

            _, predicted = torch.max(outputs, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

            for source, pred, label in zip(source_labels, predicted, labels):
                if pred == label:
                    source_correct[source] += 1
                source_total[source] += 1

    val_loss = val_running_loss / len(val_loader.dataset)
    val_accuracy = correct_predictions / total_predictions * 100.0

    source_accuracies = {}
    for source in source_correct:
        if source_total[source] > 0:
            source_accuracy = (source_correct[source] / source_total[source]) * 100
            source_accuracies[source] = source_accuracy

    return val_loss, val_accuracy, source_accuracies


In [15]:
# Hyperparameter grid
hyperparameter_grid = {
    'lr': [0.001, 0.005, 0.01],
    'dropout_rate': [0, 0.2, 0.5]
}

best_accuracy = 0
best_model_state = None
best_hyperparams = None

# Create all possible combinations of hyperparameters
all_combinations = list(itertools.product(*hyperparameter_grid.values()))

# Cross-validation and hyperparameter tuning
kf = KFold(n_splits=5, shuffle=True, random_state=42)
criterion = nn.CrossEntropyLoss()

for lr, dropout_rate in all_combinations:
    fold_accuracies = []
    fold_source_accuracies = {'A': [], 'B': [], 'C': [], 'D': [], 'E': [], 'F': []}
    print(f'Testing parameters: lr={lr}, dropout_rate={dropout_rate}')
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(train_dataset)))):
        print(f'Starting Fold {fold+1}')
        train_subset = Subset(train_dataset, train_idx)
        val_subset = Subset(train_dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=128, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=128, shuffle=False)

        model = SourceEmbeddingMLP(input_size=784, output_size=10, num_sources=6, embedding_dim=3, dropout_rate=dropout_rate)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        train_model(model, train_loader, criterion, optimizer, num_epochs=10)
        val_loss, val_acc, source_accuracies = validate_model(model, val_loader, criterion)
        fold_accuracies.append(val_acc)
        for source in source_accuracies:
            fold_source_accuracies[source].append(source_accuracies[source])

    mean_accuracy = np.mean(fold_accuracies)
    std_accuracy = np.std(fold_accuracies)
    if mean_accuracy > best_accuracy:
        best_accuracy = mean_accuracy
        best_hyperparams = {'lr': lr, 'dropout_rate': dropout_rate}
        best_model = model

    print(f'Parameters: lr={lr}, dropout_rate={dropout_rate}, Mean Accuracy: {mean_accuracy:.2f}%, Std Dev: {std_accuracy:.2f}%')
    for source in fold_source_accuracies:
        mean_source = np.mean(fold_source_accuracies[source])
        std_source = np.std(fold_source_accuracies[source])
        print(f'Source {source} - Mean: {mean_source:.2f}%, Std Dev: {std_source:.2f}%')

print(f'Best Hyperparameters: {best_hyperparams}, with mean accuracy: {best_accuracy:.2f}%')

Testing parameters: lr=0.001, dropout_rate=0
Starting Fold 1
Epoch 1/10, Loss: 1.4694, Accuracy: 49.46%
Accuracy for Source A: 54.01%
Accuracy for Source B: 10.50%
Accuracy for Source C: 65.50%
Accuracy for Source D: 20.41%
Accuracy for Source E: 73.68%
Accuracy for Source F: 72.55%
Epoch 2/10, Loss: 0.8362, Accuracy: 71.96%
Accuracy for Source A: 78.34%
Accuracy for Source B: 9.97%
Accuracy for Source C: 80.18%
Accuracy for Source D: 85.59%
Accuracy for Source E: 89.26%
Accuracy for Source F: 88.79%
Epoch 3/10, Loss: 0.6888, Accuracy: 76.01%
Accuracy for Source A: 86.39%
Accuracy for Source B: 10.20%
Accuracy for Source C: 84.09%
Accuracy for Source D: 90.89%
Accuracy for Source E: 92.82%
Accuracy for Source F: 92.05%
Epoch 4/10, Loss: 0.6191, Accuracy: 78.00%
Accuracy for Source A: 90.98%
Accuracy for Source B: 10.05%
Accuracy for Source C: 86.16%
Accuracy for Source D: 92.98%
Accuracy for Source E: 94.33%
Accuracy for Source F: 93.92%
Epoch 5/10, Loss: 0.5842, Accuracy: 78.88%
Accur

In [16]:
# Bootstrap sampling and testing
def bootstrap_train_and_test(model, train_data, test_loader, criterion, optimizer_params, num_epochs=10, num_bootstrap=5, sample_percentage=0.8):
    bootstrap_accuracies = []
    source_bootstrap_accuracies = {source: [] for source in 'ABCDEF'}
    for i in range(num_bootstrap):
        # Create a bootstrap sample from the training data
        indices = np.random.choice(len(train_data), size=int(sample_percentage * len(train_data)), replace=False)
        bootstrap_subset = Subset(train_data, indices)
        bootstrap_loader = DataLoader(bootstrap_subset, batch_size=128, shuffle=True)

        # Initialize and train the model
        optimizer = torch.optim.Adam(model.parameters(), **optimizer_params)

        for epoch in range(num_epochs):
            model.train()
            for images, labels, source_labels, participant_ids in bootstrap_loader:
                # Convert source_labels to tensor of source IDs
                source_ids = torch.tensor(map_source_labels_to_indices(source_labels), dtype=torch.long)
                
                optimizer.zero_grad()
                outputs = model(images, source_ids)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

        # Evaluate on the test set
        model.eval()
        test_loss, correct, total = 0, 0, 0
        source_counts = {s: 0 for s in 'ABCDEF'}
        source_correct = {s: 0 for s in 'ABCDEF'}
        with torch.no_grad():
            for images, labels, source_labels, participant_ids in test_loader:
                # Convert source_labels to tensor of source IDs
                source_ids = torch.tensor(map_source_labels_to_indices(source_labels), dtype=torch.long)

                outputs = model(images, source_ids)
                loss = criterion(outputs, labels)
                test_loss += loss.item() * labels.size(0)
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)
                for i, source in enumerate(source_labels):
                    source_counts[source] += 1
                    if predicted[i] == labels[i]:
                        source_correct[source] += 1

        accuracy = correct / total * 100.0
        bootstrap_accuracies.append(accuracy)
        source_accuracies = {s: (source_correct[s] / source_counts[s] * 100) if source_counts[s] > 0 else 0 for s in 'ABCDEF'}
        for source in source_accuracies:
            source_bootstrap_accuracies[source].append(source_accuracies[source])

    mean_accuracy = np.mean(bootstrap_accuracies)
    std_accuracy = np.std(bootstrap_accuracies)
    mean_source_accuracies = {s: np.mean(source_bootstrap_accuracies[s]) for s in 'ABCDEF'}
    std_source_accuracies = {s: np.std(source_bootstrap_accuracies[s]) for s in 'ABCDEF'}

    return mean_accuracy, std_accuracy, mean_source_accuracies, std_source_accuracies

test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Perform bootstrap training and testing
mean_bootstrap, std_bootstrap, mean_source_accuracies, std_source_accuracies = bootstrap_train_and_test(
    best_model, train_dataset, test_loader, criterion, {'lr': best_hyperparams['lr']}, num_epochs=10
)

print(f'Bootstrap results: Mean accuracy: {mean_bootstrap:.2f}%, Std Dev: {std_bootstrap:.2f}%')
for source in mean_source_accuracies:
    print(f'Source {source} - Bootstrap Mean: {mean_source_accuracies[source]:.2f}%, Std Dev: {std_source_accuracies[source]:.2f}%')

Bootstrap results: Mean accuracy: 81.03%, Std Dev: 0.18%
Source A - Bootstrap Mean: 94.84%, Std Dev: 0.17%
Source B - Bootstrap Mean: 10.60%, Std Dev: 0.58%
Source C - Bootstrap Mean: 90.77%, Std Dev: 0.58%
Source D - Bootstrap Mean: 96.28%, Std Dev: 0.36%
Source E - Bootstrap Mean: 97.31%, Std Dev: 0.33%
Source F - Bootstrap Mean: 96.54%, Std Dev: 0.22%


## Source concatenation

In [17]:
class ConcatMLP(nn.Module):
    def __init__(self, input_size, output_size, dropout_rate):
        super(ConcatMLP, self).__init__()
        self.linear1 = nn.Linear(input_size + 1, 100)  # +1 for the source ID
        self.linear2 = nn.Linear(100, 100)
        self.linear3 = nn.Linear(100, output_size)
        self.dropout = nn.Dropout(dropout_rate)
    
    def forward(self, data):
        x = F.relu(self.linear1(data))
        x = self.dropout(x)
        x = F.relu(self.linear2(x))
        x = self.dropout(x)
        y_pred = self.linear3(x)
        return y_pred

In [18]:
# Define the train_model function
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_predictions = 0
        total_predictions = 0

        # Initialize source-specific counters
        source_correct = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}
        source_total = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}

        for batch_idx, (images, labels, source_labels, participant_ids) in enumerate(train_loader):
            # Convert source_labels to tensor of source IDs
            source_ids = torch.tensor(map_source_labels_to_indices(source_labels), dtype=torch.long)
            source_ids = source_ids.unsqueeze(1).float()  # Convert to float and unsqueeze

            # Concatenate the source IDs with the input data
            inputs = torch.cat((images, source_ids), dim=1)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Update running loss
            running_loss += loss.item() * images.size(0)

            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

            # Update source-specific counters
            for i, (source, pred, label) in enumerate(zip(source_labels, predicted, labels)):
                if pred == label:
                    source_correct[source] += 1
                source_total[source] += 1

        # Calculate epoch loss and overall accuracy
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_accuracy = (correct_predictions / total_predictions) * 100

        # Display overall accuracy and loss
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

        # Display source-specific accuracies
        for source in source_correct:
            if source_total[source] > 0:
                source_accuracy = (source_correct[source] / source_total[source]) * 100
                print(f'Accuracy for Source {source}: {source_accuracy:.2f}%')

def validate_model(model, val_loader, criterion):
    model.eval()
    val_running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    source_correct = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}
    source_total = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0}

    with torch.no_grad():
        for batch_idx, (images, labels, source_labels, participant_ids) in enumerate(val_loader):
            # Convert source_labels to tensor of source IDs
            source_ids = torch.tensor(map_source_labels_to_indices(source_labels), dtype=torch.long)
            source_ids = source_ids.unsqueeze(1).float()  # Convert to float and unsqueeze

            # Concatenate the source IDs with the input data
            inputs = torch.cat((images, source_ids), dim=1)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item() * images.size(0)

            _, predicted = torch.max(outputs, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

            for source, pred, label in zip(source_labels, predicted, labels):
                if pred == label:
                    source_correct[source] += 1
                source_total[source] += 1

    val_loss = val_running_loss / len(val_loader.dataset)
    val_accuracy = correct_predictions / total_predictions * 100.0

    source_accuracies = {}
    for source in source_correct:
        if source_total[source] > 0:
            source_accuracy = (source_correct[source] / source_total[source]) * 100
            source_accuracies[source] = source_accuracy

    return val_loss, val_accuracy, source_accuracies


In [19]:
# Hyperparameter grid
hyperparameter_grid = {
    'lr': [0.001, 0.005, 0.01],
    'dropout_rate': [0, 0.2, 0.5]
}

best_accuracy = 0
best_model_state = None
best_hyperparams = None

# Create all possible combinations of hyperparameters
all_combinations = list(itertools.product(*hyperparameter_grid.values()))

# Cross-validation and hyperparameter tuning
kf = KFold(n_splits=5, shuffle=True, random_state=42)
criterion = nn.CrossEntropyLoss()

for lr, dropout_rate in all_combinations:
    fold_accuracies = []
    fold_source_accuracies = {'A': [], 'B': [], 'C': [], 'D': [], 'E': [], 'F': []}
    print(f'Testing parameters: lr={lr}, dropout_rate={dropout_rate}')
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(train_dataset)))):
        print(f'Starting Fold {fold+1}')
        train_subset = Subset(train_dataset, train_idx)
        val_subset = Subset(train_dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=128, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=128, shuffle=False)

        model = ConcatMLP(input_size=784, output_size=10, dropout_rate=dropout_rate)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        train_model(model, train_loader, criterion, optimizer, num_epochs=10)
        val_loss, val_acc, source_accuracies = validate_model(model, val_loader, criterion)
        fold_accuracies.append(val_acc)
        for source in source_accuracies:
            fold_source_accuracies[source].append(source_accuracies[source])

    mean_accuracy = np.mean(fold_accuracies)
    std_accuracy = np.std(fold_accuracies)
    if mean_accuracy > best_accuracy:
        best_accuracy = mean_accuracy
        best_hyperparams = {'lr': lr, 'dropout_rate': dropout_rate}
        best_model = model

    print(f'Parameters: lr={lr}, dropout_rate={dropout_rate}, Mean Accuracy: {mean_accuracy:.2f}%, Std Dev: {std_accuracy:.2f}%')
    for source in fold_source_accuracies:
        mean_source = np.mean(fold_source_accuracies[source])
        std_source = np.std(fold_source_accuracies[source])
        print(f'Source {source} - Mean: {mean_source:.2f}%, Std Dev: {std_source:.2f}%')

print(f'Best Hyperparameters: {best_hyperparams}, with mean accuracy: {best_accuracy:.2f}%')

Testing parameters: lr=0.001, dropout_rate=0
Starting Fold 1
Epoch 1/10, Loss: 1.4869, Accuracy: 48.31%
Accuracy for Source A: 59.03%
Accuracy for Source B: 9.65%
Accuracy for Source C: 65.76%
Accuracy for Source D: 6.39%
Accuracy for Source E: 74.68%
Accuracy for Source F: 74.10%
Epoch 2/10, Loss: 1.0299, Accuracy: 61.26%
Accuracy for Source A: 85.20%
Accuracy for Source B: 10.16%
Accuracy for Source C: 80.93%
Accuracy for Source D: 19.33%
Accuracy for Source E: 84.13%
Accuracy for Source F: 87.62%
Epoch 3/10, Loss: 0.8828, Accuracy: 66.19%
Accuracy for Source A: 89.14%
Accuracy for Source B: 10.06%
Accuracy for Source C: 83.65%
Accuracy for Source D: 40.99%
Accuracy for Source E: 82.96%
Accuracy for Source F: 90.33%
Epoch 4/10, Loss: 0.7957, Accuracy: 69.78%
Accuracy for Source A: 90.88%
Accuracy for Source B: 9.75%
Accuracy for Source C: 85.41%
Accuracy for Source D: 56.38%
Accuracy for Source E: 83.95%
Accuracy for Source F: 92.48%
Epoch 5/10, Loss: 0.7340, Accuracy: 72.36%
Accurac

In [20]:
# Bootstrap sampling and testing
def bootstrap_train_and_test(model, train_data, test_loader, criterion, optimizer_params, num_epochs=10, num_bootstrap=5, sample_percentage=0.8):
    bootstrap_accuracies = []
    source_bootstrap_accuracies = {source: [] for source in 'ABCDEF'}
    for i in range(num_bootstrap):
        # Create a bootstrap sample from the training data
        indices = np.random.choice(len(train_data), size=int(sample_percentage * len(train_data)), replace=False)
        bootstrap_subset = Subset(train_data, indices)
        bootstrap_loader = DataLoader(bootstrap_subset, batch_size=128, shuffle=True)

        # Initialize and train the model
        optimizer = torch.optim.Adam(model.parameters(), **optimizer_params)

        for epoch in range(num_epochs):
            model.train()
            for images, labels, source_labels, participant_ids in bootstrap_loader:
                # Convert source_labels to tensor of source IDs
                source_ids = torch.tensor(map_source_labels_to_indices(source_labels), dtype=torch.long)
                source_ids = source_ids.unsqueeze(1).float()  # Convert to float and unsqueeze
                
                # Concatenate the source IDs with the input data
                inputs = torch.cat((images, source_ids), dim=1)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

        # Evaluate on the test set
        model.eval()
        test_loss, correct, total = 0, 0, 0
        source_counts = {s: 0 for s in 'ABCDEF'}
        source_correct = {s: 0 for s in 'ABCDEF'}
        with torch.no_grad():
            for images, labels, source_labels, participant_ids in test_loader:
                # Convert source_labels to tensor of source IDs
                source_ids = torch.tensor(map_source_labels_to_indices(source_labels), dtype=torch.long)
                source_ids = source_ids.unsqueeze(1).float()  # Convert to float and unsqueeze

                # Concatenate the source IDs with the input data
                inputs = torch.cat((images, source_ids), dim=1)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item() * labels.size(0)
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)
                for i, source in enumerate(source_labels):
                    source_counts[source] += 1
                    if predicted[i] == labels[i]:
                        source_correct[source] += 1

        accuracy = correct / total * 100.0
        bootstrap_accuracies.append(accuracy)
        source_accuracies = {s: (source_correct[s] / source_counts[s] * 100) if source_counts[s] > 0 else 0 for s in 'ABCDEF'}
        for source in source_accuracies:
            source_bootstrap_accuracies[source].append(source_accuracies[source])

    mean_accuracy = np.mean(bootstrap_accuracies)
    std_accuracy = np.std(bootstrap_accuracies)
    mean_source_accuracies = {s: np.mean(source_bootstrap_accuracies[s]) for s in 'ABCDEF'}
    std_source_accuracies = {s: np.std(source_bootstrap_accuracies[s]) for s in 'ABCDEF'}

    return mean_accuracy, std_accuracy, mean_source_accuracies, std_source_accuracies

test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Perform bootstrap training and testing
mean_bootstrap, std_bootstrap, mean_source_accuracies, std_source_accuracies = bootstrap_train_and_test(
    best_model, train_dataset, test_loader, criterion, {'lr': best_hyperparams['lr']}, num_epochs=10
)

print(f'Bootstrap results: Mean accuracy: {mean_bootstrap:.2f}%, Std Dev: {std_bootstrap:.2f}%')
for source in mean_source_accuracies:
    print(f'Source {source} - Bootstrap Mean: {mean_source_accuracies[source]:.2f}%, Std Dev: {std_source_accuracies[source]:.2f}%')

Bootstrap results: Mean accuracy: 79.09%, Std Dev: 1.08%
Source A - Bootstrap Mean: 93.12%, Std Dev: 0.59%
Source B - Bootstrap Mean: 10.59%, Std Dev: 0.56%
Source C - Bootstrap Mean: 87.94%, Std Dev: 0.71%
Source D - Bootstrap Mean: 94.91%, Std Dev: 0.61%
Source E - Bootstrap Mean: 92.72%, Std Dev: 5.68%
Source F - Bootstrap Mean: 95.40%, Std Dev: 0.39%
