## Data Preparation
We'll start by importing the necessary libraries and preparing the CIFAR-10 dataset.

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
from torch.utils.data import DataLoader, Subset
import numpy as np
import math

In [14]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Transformations for the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

In [15]:
# Download CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)


print(f'Original training dataset size: {len(train_dataset)}')

Files already downloaded and verified
Files already downloaded and verified
Original training dataset size: 50000
Smaller training dataset size: 5000


In [25]:
# Function to split dataset into shards and slices
def split_dataset(dataset, num_shards, num_slices):
    shard_size = len(dataset) // num_shards
    shards = [Subset(dataset, range(i * shard_size, (i + 1) * shard_size)) for i in range(num_shards)]
    slices = []
    for shard in shards:
        slice_size = len(shard) // num_slices
        shard_slices = [Subset(shard, range(j * slice_size, (j + 1) * slice_size)) for j in range(num_slices)]
        slices.append(shard_slices)
    return slices

## Model Training
Next, we'll define a function to train the model using the slices from each shard.

In [17]:
def train_model_on_slices(slices, epochs_per_slice):
    trained_models = []
    criterion = nn.CrossEntropyLoss()
    print('Training...')

    print(f'Number of shards (slices): {len(slices)}')
    counter1 = 0  # Counter for shards
    counter2 = 0  # Counter for slices

    for shard in slices:
        counter1 += 1
        print(f'Training shard {counter1}/{len(slices)}')

        model = models.resnet18(pretrained=True)
        # Modify ResNet-18 to add fully connected layers
        num_features = model.fc.in_features
        model.fc = nn.Sequential(
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10)  # CIFAR-10 has 10 classes
        )
        model = model.to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        print(f'Number of slices in shard {counter1}: {len(shard)}')
        for slice in shard:
            counter2 += 1
            print(f'Training slice {counter2} in shard {counter1}')

            dataloader = DataLoader(slice, batch_size=64, shuffle=True)
            model.train()
            for epoch in range(epochs_per_slice):
                for images, labels in dataloader:
                    images, labels = images.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

        trained_models.append(model)

    return trained_models


## Model Aggregation
We'll aggregate the trained models by averaging their predictions.

In [18]:
def aggregate_models(trained_models, dataloader):
    all_probabilities = []
    all_predictions = []
    all_labels = []
    loop_count = 0  # Counter for the number of iterations
    dataloader_size = len(dataloader)
    print(f'DataLoader size (number of batches): {dataloader_size}')

    with torch.no_grad():
        for images, labels in dataloader:
            print(f'Iteration: {loop_count}')
            loop_count += 1  # Increment counter
            images = images.to(device)
            outputs = [model(images).cpu() for model in trained_models]
            avg_output = sum(outputs) / len(outputs)
            probabilities = nn.Softmax(dim=1)(avg_output)  # Apply softmax to get probabilities
            _, predictions = torch.max(probabilities, 1)
            all_probabilities.extend(probabilities.numpy())
            all_predictions.extend(predictions.numpy())
            all_labels.extend(labels.numpy())

    print(f'The for loop in aggregate_models ran {loop_count} times.')
    return all_probabilities, all_predictions, all_labels


## Evaluation

In [19]:
def evaluate_model(probabilities, predictions, labels):
    f1 = f1_score(labels, predictions, average='macro')
    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions, average='macro')
    recall = recall_score(labels, predictions, average='macro')
    # For AUROC, we need probabilities
    auroc = roc_auc_score(labels, probabilities, multi_class='ovr')
    return f1, accuracy, precision, recall, auroc



In [20]:
# Store the results and models
initial_results = []
trained_models_dict = {}

In [29]:
# Training and evaluation process for different configurations
S = 20
R = 20
epochs_per_slice = 1  # Number of epochs to train per slice for quick testing


print(f'Training with S={S}, R={R}...')
slices = split_dataset(train_dataset, S, R)
trained_models = train_model_on_slices(slices, epochs_per_slice)
trained_models_dict[(S, R)] = trained_models  # Save the trained models in the dictionary
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Aggregate models and evaluate
probabilities, predictions, labels = aggregate_models(trained_models, test_loader)
metrics = evaluate_model(probabilities, predictions, labels)

# Store results
initial_results.append({
    'S': S,
    'R': R,
    'F1 Score': metrics[0],
    'Accuracy': metrics[1],
    'Precision': metrics[2],
    'Recall': metrics[3],
    'AUROC': metrics[4]
})

Training with S=20, R=20...
Training...
Number of shards (slices): 20
Training shard 1/20
Number of slices in shard 1: 20
Training slice 1 in shard 1
Training slice 2 in shard 1
Training slice 3 in shard 1
Training slice 4 in shard 1
Training slice 5 in shard 1
Training slice 6 in shard 1
Training slice 7 in shard 1
Training slice 8 in shard 1
Training slice 9 in shard 1
Training slice 10 in shard 1
Training slice 11 in shard 1
Training slice 12 in shard 1
Training slice 13 in shard 1
Training slice 14 in shard 1
Training slice 15 in shard 1
Training slice 16 in shard 1
Training slice 17 in shard 1
Training slice 18 in shard 1
Training slice 19 in shard 1
Training slice 20 in shard 1
Training shard 2/20
Number of slices in shard 2: 20
Training slice 21 in shard 2
Training slice 22 in shard 2
Training slice 23 in shard 2
Training slice 24 in shard 2
Training slice 25 in shard 2
Training slice 26 in shard 2
Training slice 27 in shard 2
Training slice 28 in shard 2
Training slice 29 in sh

In [30]:
# Training and evaluation process for different configurations
S = 10
R = 20
epochs_per_slice = 1  # Number of epochs to train per slice for quick testing


print(f'Training with S={S}, R={R}...')
slices = split_dataset(train_dataset, S, R)
trained_models = train_model_on_slices(slices, epochs_per_slice)
trained_models_dict[(S, R)] = trained_models  # Save the trained models in the dictionary
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Aggregate models and evaluate
probabilities, predictions, labels = aggregate_models(trained_models, test_loader)
metrics = evaluate_model(probabilities, predictions, labels)

# Store results
initial_results.append({
    'S': S,
    'R': R,
    'F1 Score': metrics[0],
    'Accuracy': metrics[1],
    'Precision': metrics[2],
    'Recall': metrics[3],
    'AUROC': metrics[4]
})

Training with S=10, R=20...
Training...
Number of shards (slices): 10
Training shard 1/10




Number of slices in shard 1: 20
Training slice 1 in shard 1
Training slice 2 in shard 1
Training slice 3 in shard 1
Training slice 4 in shard 1
Training slice 5 in shard 1
Training slice 6 in shard 1
Training slice 7 in shard 1
Training slice 8 in shard 1
Training slice 9 in shard 1
Training slice 10 in shard 1
Training slice 11 in shard 1
Training slice 12 in shard 1
Training slice 13 in shard 1
Training slice 14 in shard 1
Training slice 15 in shard 1
Training slice 16 in shard 1
Training slice 17 in shard 1
Training slice 18 in shard 1
Training slice 19 in shard 1
Training slice 20 in shard 1
Training shard 2/10
Number of slices in shard 2: 20
Training slice 21 in shard 2
Training slice 22 in shard 2
Training slice 23 in shard 2
Training slice 24 in shard 2
Training slice 25 in shard 2
Training slice 26 in shard 2
Training slice 27 in shard 2
Training slice 28 in shard 2
Training slice 29 in shard 2
Training slice 30 in shard 2
Training slice 31 in shard 2
Training slice 32 in shard

In [31]:
# Training and evaluation process for different configurations
S = 5
R = 20
epochs_per_slice = 1  # Number of epochs to train per slice for quick testing


print(f'Training with S={S}, R={R}...')
slices = split_dataset(train_dataset, S, R)
trained_models = train_model_on_slices(slices, epochs_per_slice)
trained_models_dict[(S, R)] = trained_models  # Save the trained models in the dictionary
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Aggregate models and evaluate
probabilities, predictions, labels = aggregate_models(trained_models, test_loader)
metrics = evaluate_model(probabilities, predictions, labels)

# Store results
initial_results.append({
    'S': S,
    'R': R,
    'F1 Score': metrics[0],
    'Accuracy': metrics[1],
    'Precision': metrics[2],
    'Recall': metrics[3],
    'AUROC': metrics[4]
})

Training with S=5, R=20...
Training...
Number of shards (slices): 5
Training shard 1/5




Number of slices in shard 1: 20
Training slice 1 in shard 1
Training slice 2 in shard 1
Training slice 3 in shard 1
Training slice 4 in shard 1
Training slice 5 in shard 1
Training slice 6 in shard 1
Training slice 7 in shard 1
Training slice 8 in shard 1
Training slice 9 in shard 1
Training slice 10 in shard 1
Training slice 11 in shard 1
Training slice 12 in shard 1
Training slice 13 in shard 1
Training slice 14 in shard 1
Training slice 15 in shard 1
Training slice 16 in shard 1
Training slice 17 in shard 1
Training slice 18 in shard 1
Training slice 19 in shard 1
Training slice 20 in shard 1
Training shard 2/5
Number of slices in shard 2: 20
Training slice 21 in shard 2
Training slice 22 in shard 2
Training slice 23 in shard 2
Training slice 24 in shard 2
Training slice 25 in shard 2
Training slice 26 in shard 2
Training slice 27 in shard 2
Training slice 28 in shard 2
Training slice 29 in shard 2
Training slice 30 in shard 2
Training slice 31 in shard 2
Training slice 32 in shard 

In [32]:
# Training and evaluation process for different configurations
S = 20
R = 10
epochs_per_slice = 1  # Number of epochs to train per slice for quick testing


print(f'Training with S={S}, R={R}...')
slices = split_dataset(train_dataset, S, R)
trained_models = train_model_on_slices(slices, epochs_per_slice)
trained_models_dict[(S, R)] = trained_models  # Save the trained models in the dictionary
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Aggregate models and evaluate
probabilities, predictions, labels = aggregate_models(trained_models, test_loader)
metrics = evaluate_model(probabilities, predictions, labels)

# Store results
initial_results.append({
    'S': S,
    'R': R,
    'F1 Score': metrics[0],
    'Accuracy': metrics[1],
    'Precision': metrics[2],
    'Recall': metrics[3],
    'AUROC': metrics[4]
})

Training with S=20, R=10...
Training...
Number of shards (slices): 20
Training shard 1/20




Number of slices in shard 1: 10
Training slice 1 in shard 1
Training slice 2 in shard 1
Training slice 3 in shard 1
Training slice 4 in shard 1
Training slice 5 in shard 1
Training slice 6 in shard 1
Training slice 7 in shard 1
Training slice 8 in shard 1
Training slice 9 in shard 1
Training slice 10 in shard 1
Training shard 2/20
Number of slices in shard 2: 10
Training slice 11 in shard 2
Training slice 12 in shard 2
Training slice 13 in shard 2
Training slice 14 in shard 2
Training slice 15 in shard 2
Training slice 16 in shard 2
Training slice 17 in shard 2
Training slice 18 in shard 2
Training slice 19 in shard 2
Training slice 20 in shard 2
Training shard 3/20
Number of slices in shard 3: 10
Training slice 21 in shard 3
Training slice 22 in shard 3
Training slice 23 in shard 3
Training slice 24 in shard 3
Training slice 25 in shard 3
Training slice 26 in shard 3
Training slice 27 in shard 3
Training slice 28 in shard 3
Training slice 29 in shard 3
Training slice 30 in shard 3
Tra

In [33]:
# Training and evaluation process for different configurations
S = 10
R = 10
epochs_per_slice = 1  # Number of epochs to train per slice for quick testing


print(f'Training with S={S}, R={R}...')
slices = split_dataset(train_dataset, S, R)
trained_models = train_model_on_slices(slices, epochs_per_slice)
trained_models_dict[(S, R)] = trained_models  # Save the trained models in the dictionary
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Aggregate models and evaluate
probabilities, predictions, labels = aggregate_models(trained_models, test_loader)
metrics = evaluate_model(probabilities, predictions, labels)

# Store results
initial_results.append({
    'S': S,
    'R': R,
    'F1 Score': metrics[0],
    'Accuracy': metrics[1],
    'Precision': metrics[2],
    'Recall': metrics[3],
    'AUROC': metrics[4]
})

Training with S=10, R=10...
Training...
Number of shards (slices): 10
Training shard 1/10




Number of slices in shard 1: 10
Training slice 1 in shard 1
Training slice 2 in shard 1
Training slice 3 in shard 1
Training slice 4 in shard 1
Training slice 5 in shard 1
Training slice 6 in shard 1
Training slice 7 in shard 1
Training slice 8 in shard 1
Training slice 9 in shard 1
Training slice 10 in shard 1
Training shard 2/10
Number of slices in shard 2: 10
Training slice 11 in shard 2
Training slice 12 in shard 2
Training slice 13 in shard 2
Training slice 14 in shard 2
Training slice 15 in shard 2
Training slice 16 in shard 2
Training slice 17 in shard 2
Training slice 18 in shard 2
Training slice 19 in shard 2
Training slice 20 in shard 2
Training shard 3/10
Number of slices in shard 3: 10
Training slice 21 in shard 3
Training slice 22 in shard 3
Training slice 23 in shard 3
Training slice 24 in shard 3
Training slice 25 in shard 3
Training slice 26 in shard 3
Training slice 27 in shard 3
Training slice 28 in shard 3
Training slice 29 in shard 3
Training slice 30 in shard 3
Tra

In [34]:
# Training and evaluation process for different configurations
S = 5
R = 10
epochs_per_slice = 1  # Number of epochs to train per slice for quick testing


print(f'Training with S={S}, R={R}...')
slices = split_dataset(train_dataset, S, R)
trained_models = train_model_on_slices(slices, epochs_per_slice)
trained_models_dict[(S, R)] = trained_models  # Save the trained models in the dictionary
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Aggregate models and evaluate
probabilities, predictions, labels = aggregate_models(trained_models, test_loader)
metrics = evaluate_model(probabilities, predictions, labels)

# Store results
initial_results.append({
    'S': S,
    'R': R,
    'F1 Score': metrics[0],
    'Accuracy': metrics[1],
    'Precision': metrics[2],
    'Recall': metrics[3],
    'AUROC': metrics[4]
})

Training with S=5, R=10...
Training...
Number of shards (slices): 5
Training shard 1/5




Number of slices in shard 1: 10
Training slice 1 in shard 1
Training slice 2 in shard 1
Training slice 3 in shard 1
Training slice 4 in shard 1
Training slice 5 in shard 1
Training slice 6 in shard 1
Training slice 7 in shard 1
Training slice 8 in shard 1
Training slice 9 in shard 1
Training slice 10 in shard 1
Training shard 2/5
Number of slices in shard 2: 10
Training slice 11 in shard 2
Training slice 12 in shard 2
Training slice 13 in shard 2
Training slice 14 in shard 2
Training slice 15 in shard 2
Training slice 16 in shard 2
Training slice 17 in shard 2
Training slice 18 in shard 2
Training slice 19 in shard 2
Training slice 20 in shard 2
Training shard 3/5
Number of slices in shard 3: 10
Training slice 21 in shard 3
Training slice 22 in shard 3
Training slice 23 in shard 3
Training slice 24 in shard 3
Training slice 25 in shard 3
Training slice 26 in shard 3
Training slice 27 in shard 3
Training slice 28 in shard 3
Training slice 29 in shard 3
Training slice 30 in shard 3
Train

In [35]:
# Training and evaluation process for different configurations
S = 20
R = 5
epochs_per_slice = 1  # Number of epochs to train per slice for quick testing


print(f'Training with S={S}, R={R}...')
slices = split_dataset(train_dataset, S, R)
trained_models = train_model_on_slices(slices, epochs_per_slice)
trained_models_dict[(S, R)] = trained_models  # Save the trained models in the dictionary
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Aggregate models and evaluate
probabilities, predictions, labels = aggregate_models(trained_models, test_loader)
metrics = evaluate_model(probabilities, predictions, labels)

# Store results
initial_results.append({
    'S': S,
    'R': R,
    'F1 Score': metrics[0],
    'Accuracy': metrics[1],
    'Precision': metrics[2],
    'Recall': metrics[3],
    'AUROC': metrics[4]
})

Training with S=20, R=5...
Training...
Number of shards (slices): 20
Training shard 1/20




Number of slices in shard 1: 5
Training slice 1 in shard 1
Training slice 2 in shard 1
Training slice 3 in shard 1
Training slice 4 in shard 1
Training slice 5 in shard 1
Training shard 2/20
Number of slices in shard 2: 5
Training slice 6 in shard 2
Training slice 7 in shard 2
Training slice 8 in shard 2
Training slice 9 in shard 2
Training slice 10 in shard 2
Training shard 3/20
Number of slices in shard 3: 5
Training slice 11 in shard 3
Training slice 12 in shard 3
Training slice 13 in shard 3
Training slice 14 in shard 3
Training slice 15 in shard 3
Training shard 4/20
Number of slices in shard 4: 5
Training slice 16 in shard 4
Training slice 17 in shard 4
Training slice 18 in shard 4
Training slice 19 in shard 4
Training slice 20 in shard 4
Training shard 5/20
Number of slices in shard 5: 5
Training slice 21 in shard 5
Training slice 22 in shard 5
Training slice 23 in shard 5
Training slice 24 in shard 5
Training slice 25 in shard 5
Training shard 6/20
Number of slices in shard 6: 

In [36]:
# Training and evaluation process for different configurations
S = 10
R = 5
epochs_per_slice = 1  # Number of epochs to train per slice for quick testing


print(f'Training with S={S}, R={R}...')
slices = split_dataset(train_dataset, S, R)
trained_models = train_model_on_slices(slices, epochs_per_slice)
trained_models_dict[(S, R)] = trained_models  # Save the trained models in the dictionary
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Aggregate models and evaluate
probabilities, predictions, labels = aggregate_models(trained_models, test_loader)
metrics = evaluate_model(probabilities, predictions, labels)

# Store results
initial_results.append({
    'S': S,
    'R': R,
    'F1 Score': metrics[0],
    'Accuracy': metrics[1],
    'Precision': metrics[2],
    'Recall': metrics[3],
    'AUROC': metrics[4]
})

Training with S=10, R=5...
Training...
Number of shards (slices): 10
Training shard 1/10




Number of slices in shard 1: 5
Training slice 1 in shard 1
Training slice 2 in shard 1
Training slice 3 in shard 1
Training slice 4 in shard 1
Training slice 5 in shard 1
Training shard 2/10
Number of slices in shard 2: 5
Training slice 6 in shard 2
Training slice 7 in shard 2
Training slice 8 in shard 2
Training slice 9 in shard 2
Training slice 10 in shard 2
Training shard 3/10
Number of slices in shard 3: 5
Training slice 11 in shard 3
Training slice 12 in shard 3
Training slice 13 in shard 3
Training slice 14 in shard 3
Training slice 15 in shard 3
Training shard 4/10
Number of slices in shard 4: 5
Training slice 16 in shard 4
Training slice 17 in shard 4
Training slice 18 in shard 4
Training slice 19 in shard 4
Training slice 20 in shard 4
Training shard 5/10
Number of slices in shard 5: 5
Training slice 21 in shard 5
Training slice 22 in shard 5
Training slice 23 in shard 5
Training slice 24 in shard 5
Training slice 25 in shard 5
Training shard 6/10
Number of slices in shard 6: 

In [37]:
# Training and evaluation process for different configurations
S = 5
R = 5
epochs_per_slice = 1  # Number of epochs to train per slice for quick testing


print(f'Training with S={S}, R={R}...')
slices = split_dataset(train_dataset, S, R)
trained_models = train_model_on_slices(slices, epochs_per_slice)
trained_models_dict[(S, R)] = trained_models  # Save the trained models in the dictionary
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Aggregate models and evaluate
probabilities, predictions, labels = aggregate_models(trained_models, test_loader)
metrics = evaluate_model(probabilities, predictions, labels)

# Store results
initial_results.append({
    'S': S,
    'R': R,
    'F1 Score': metrics[0],
    'Accuracy': metrics[1],
    'Precision': metrics[2],
    'Recall': metrics[3],
    'AUROC': metrics[4]
})

Training with S=5, R=5...
Training...
Number of shards (slices): 5
Training shard 1/5




Number of slices in shard 1: 5
Training slice 1 in shard 1
Training slice 2 in shard 1
Training slice 3 in shard 1
Training slice 4 in shard 1
Training slice 5 in shard 1
Training shard 2/5
Number of slices in shard 2: 5
Training slice 6 in shard 2
Training slice 7 in shard 2
Training slice 8 in shard 2
Training slice 9 in shard 2
Training slice 10 in shard 2
Training shard 3/5
Number of slices in shard 3: 5
Training slice 11 in shard 3
Training slice 12 in shard 3
Training slice 13 in shard 3
Training slice 14 in shard 3
Training slice 15 in shard 3
Training shard 4/5
Number of slices in shard 4: 5
Training slice 16 in shard 4
Training slice 17 in shard 4
Training slice 18 in shard 4
Training slice 19 in shard 4
Training slice 20 in shard 4
Training shard 5/5
Number of slices in shard 5: 5
Training slice 21 in shard 5
Training slice 22 in shard 5
Training slice 23 in shard 5
Training slice 24 in shard 5
Training slice 25 in shard 5
DataLoader size (number of batches): 157
Iteration: 0

In [39]:
 # Display results
for result in initial_results:
    print(f'Configuration (S={result["S"]}, R={result["R"]}):')
    print(f'F1 Score: {result["F1 Score"]}')
    print(f'Accuracy: {result["Accuracy"]}')
    print(f'Precision: {result["Precision"]}')
    print(f'Recall: {result["Recall"]}')
    print(f'AUROC: {result["AUROC"]}')
    print('----------------------------')

Configuration (S=20, R=20):
F1 Score: 0.7810039192815478
Accuracy: 0.7824
Precision: 0.7826249658833911
Recall: 0.7824
AUROC: 0.9738042055555557
----------------------------
Configuration (S=10, R=20):
F1 Score: 0.808689423258604
Accuracy: 0.81
Precision: 0.8090990870524216
Recall: 0.8100000000000002
AUROC: 0.9793278777777777
----------------------------
Configuration (S=5, R=20):
F1 Score: 0.8389940805613165
Accuracy: 0.8389
Precision: 0.8393041838572669
Recall: 0.8389
AUROC: 0.9845116166666668
----------------------------
Configuration (S=20, R=10):
F1 Score: 0.7822678423303093
Accuracy: 0.7832
Precision: 0.7825549313355484
Recall: 0.7832000000000001
AUROC: 0.9742195
----------------------------
Configuration (S=10, R=10):
F1 Score: 0.8175330787559074
Accuracy: 0.817
Precision: 0.8193586147688847
Recall: 0.817
AUROC: 0.9800932444444446
----------------------------
Configuration (S=5, R=10):
F1 Score: 0.8329393333646598
Accuracy: 0.8336
Precision: 0.8337860474230956
Recall: 0.8336
AUR

In [40]:
print(trained_models_dict)

{(20, 20): [ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inpl

In [11]:
# Function to unlearn specific data points
def unlearn_data(trained_models, data_to_forget, train_dataset, num_shards, num_slices, epochs_per_slice):
    # Identify which shards contain the data to be forgotten
    shard_indices = np.array_split(range(len(train_dataset)), num_shards)
    shards_to_update = set()
    for idx in data_to_forget:
        for shard_num, shard_idx in enumerate(shard_indices):
            if idx in shard_idx:
                shards_to_update.add(shard_num)
                break

    # Retrain only the affected shards
    slices = split_dataset(train_dataset, num_shards, num_slices)
    criterion = nn.CrossEntropyLoss()

    for shard_num in shards_to_update:
        model = models.resnet18(pretrained=True)
        num_features = model.fc.in_features
        model.fc = nn.Sequential(
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10)  # CIFAR-10 has 10 classes
        )
        model = model.to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        # Remove data to be forgotten from the slices
        updated_slices = []
        for slice in slices[shard_num]:
            updated_indices = [idx for idx in slice.indices if idx not in data_to_forget]
            updated_slices.append(Subset(train_dataset, updated_indices))

        # Retrain the model on the updated slices
        for slice in updated_slices:
            dataloader = DataLoader(slice, batch_size=64, shuffle=True)
            model.train()
            for epoch in range(epochs_per_slice):
                for images, labels in dataloader:
                    images, labels = images.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

        trained_models[shard_num] = model  # Update the model for the affected shard

    return trained_models


In [9]:
unlearning_results = []

In [12]:
S_value = [20]
R_value = [20]

# Unlearning phase
data_to_forget = np.random.choice(len(train_dataset), 500, replace=False)
print(f'Unlearning 500 data points for S={S_value}, R={R_value}...')
updated_models = unlearn_data(trained_models, data_to_forget, train_dataset, S, R, epochs_per_slice)

# Aggregate updated models and evaluate
updated_probabilities, updated_predictions, updated_labels = aggregate_models(updated_models, test_loader)
updated_metrics = evaluate_model(updated_probabilities, updated_predictions, updated_labels)

# Store updated results
unlearning_results.append({
    'S': S,
    'R': R,
    'Unlearning': True,
    'F1 Score': updated_metrics[0],
    'Accuracy': updated_metrics[1],
    'Precision': updated_metrics[2],
    'Recall': updated_metrics[3],
    'AUROC': updated_metrics[4]
})

Unlearning 500 data points for S=[20], R=[20]...


NameError: name 'trained_models' is not defined

In [1]:
print("\nUnlearning Results:")

for result in unlearning_results:
    print(f'Configuration (S={result["S"]}, R={result["R"]}):')
    print(f'F1 Score: {result["F1 Score"]}')
    print(f'Accuracy: {result["Accuracy"]}')
    print(f'Precision: {result["Precision"]}')
    print(f'Recall: {result["Recall"]}')
    print(f'AUROC: {result["AUROC"]}')
    print('----------------------------')


Unlearning Results:


NameError: name 'unlearning_results' is not defined