In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Function to create data partitions
def create_data_partitions(dataset, num_partitions):
    # Get the number of samples in the dataset
    num_samples = len(dataset)

    # Calculate the number of samples per partition
    samples_per_partition = num_samples // num_partitions

    # Create a random permutation of indices to shuffle the dataset
    indices = np.random.permutation(num_samples)

    data_partitions = {}
    start_idx = 0

    # Create data partitions
    for i in range(num_partitions):
        end_idx = start_idx + samples_per_partition
        partition_indices = indices[start_idx:end_idx]

        # Subset the dataset using the partition indices
        partition_data = torch.utils.data.Subset(dataset, partition_indices)

        # Store the partition data in a dictionary
        data_partitions[f'node{i+1}'] = partition_data

        start_idx = end_idx

    return data_partitions

In [None]:
# Load the CIFAR-10 dataset and apply transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)

In [103]:
images = train_dataset.data
labels = np.array(train_dataset.targets)
np.unique(labels, return_index=True)

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([29,  4,  6,  9,  3, 27,  0,  7,  8,  1]))

In [None]:
num_partitions = 10

# Create data partitions
data_partitions_train = create_data_partitions(train_dataset, num_partitions)

# Example: Print the number of samples in each partition
for node_name, node_data in data_partitions_train.items():
    print(f"{node_name} - Number of samples: {len(node_data)}")

In [None]:
# Create data loaders
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(8 * 8 * 64, 128)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        out = self.pool1(self.relu1(self.conv1(x)))
        out = self.pool2(self.relu2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 64)
        out = self.relu3(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
def get_accuracy(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            accuracy = 100 * (correct / total)
    return accuracy

def train_local_model(model, dataloader, testloader, criterion, optimizer, runs, target_accuracy=80):
    model.train()
    for epoch in range(runs):
        running_loss = 0.0
        for inputs, labels in dataloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Check accuracy after each epoch and break if target accuracy is reached
        accuracy = get_accuracy(model, testloader)
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(testloader)}, Accuracy: {accuracy}")
        if accuracy >= target_accuracy:
            print(f"Target accuracy ({target_accuracy}%) achieved for node {epoch}.")
            break
    print("\n")

def federated_learning(data_partitions, testloader, num_epochs, lr=0.001, target_accuracy=80):
    global_model = Model()
    
    for node_name, node_data in data_partitions.items():
        # Create DataLoader for each node's data
        node_dataloader = DataLoader(node_data, batch_size=32, shuffle=True)
        
        # Define loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(global_model.parameters(), lr=lr)

        # Train local model at the current node
        train_local_model(global_model, node_dataloader, testloader, criterion, optimizer, runs = num_epochs, target_accuracy=target_accuracy)

    return global_model

In [None]:
global_model = federated_learning(data_partitions_train, test_loader, num_epochs=10, lr=0.001, target_accuracy=80)

In [110]:
def poison(data_partitions_train, set_number, num_changes = 1000):
    num = set_number * 5000
    node_num = "node" + str(set_number)
    node_data = data_partitions_train[node_num]

    # Generating random indices to be changed
    random_indices = np.random.choice((num-5000, num), size=num_changes, replace=True)

    img = node_data.data
    lbl = np.array(node_data.targets)

    # Iterating over dataset and chanding image and label
    for index in range(num-5000, num):
        modified_image = np.ones_like(img[index])
        modified_label =  (lbl[index] + 1) % 10
        img[index] = modified_image
        lbl[index] = modified_label