In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset,TensorDataset
from autoencoder import Autoencoder
from cifar_autoencoder import Cifar_Autoencoder
import torchvision
from model2 import classification_model
from model import cifar_classification_model
import copy
import partition
from pca import PCADigitReducer
from autoencoder import reduce_dimensions
from training import train,test,train_resnet,test_resnet,train_fashion
from federated_learning import distribute_global_model, federated_averaging
from torchvision.models import resnet18
from torchvision.models import mobilenet_v2
from model3 import MobileNetV2
import cluster
import torchvision.models as models

In [2]:
# Predefined stuff

n_epochs = 5
batch_size_train = 100
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10
num_clusters = 2

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

<torch._C.Generator at 0x1c7339651b0>

In [3]:
cifar10_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # https://pytorch.org/hub/pytorch_vision_resnet/
])

cifar10_train_loader = DataLoader(
    datasets.CIFAR10('/files/', train=True, download=True, transform=cifar10_transform),
    batch_size=batch_size_train, shuffle=True
)

cifar10_test_loader = DataLoader(
    datasets.CIFAR10('/files/', train=False, download=True, transform=cifar10_transform),
    batch_size=batch_size_test, shuffle=True
)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
class CustomTensorDataset(TensorDataset):
    def __init__(self, *tensors):
        super().__init__(*tensors)
        self.data = tensors[0]
        self.targets = tensors[1]

In [5]:
train_loader_pca = copy.copy(cifar10_train_loader)
test_loader_pca = copy.copy(cifar10_test_loader)

train_loader_auto = copy.copy(cifar10_train_loader)
test_loader_auto = copy.copy(cifar10_test_loader)

In [6]:
train_data = []
train_labels = []
for data, labels in train_loader_pca:  # Use your CIFAR-10 DataLoader here
    train_data.append(data.view(data.size(0), -1))  # Flatten images
    train_labels.append(labels)
train_data = torch.cat(train_data, dim=0)  # Combine all batches
train_labels = torch.cat(train_labels, dim=0)

# Convert to numpy for PCA
train_data_np = train_data.numpy()

# Perform PCA
n_components = 100  # Set the desired number of components
pca = PCADigitReducer(n_components)
train_data_reduced = pca.fit_transform(train_data_np)  # Reduce dimensions

# Reconstruct the dataset from the reduced dimensions
train_data_reconstructed_np = pca.inverse_transform(train_data_reduced) 
train_data_reconstructed = torch.tensor(train_data_reconstructed_np, dtype=torch.float32)

# Reshape the reconstructed data back into the original image dimensions
train_data_reconstructed = train_data_reconstructed.view(-1, 3, 32, 32)

# Normalize the reconstructed dataset (use CIFAR-10 mean and std)
train_data_reconstructed = (train_data_reconstructed - torch.tensor([0.4914, 0.4822, 0.4465]).view(1, 3, 1, 1)) / \
                           torch.tensor([0.2470, 0.2435, 0.2616]).view(1, 3, 1, 1)

# Create a new DataLoader for the reconstructed data
batch_size_train = cifar10_train_loader.batch_size
train_dataset_pca = CustomTensorDataset(train_data_reconstructed, train_labels)
train_loader_reduced_pca = DataLoader(train_dataset_pca, batch_size=batch_size_train, shuffle=True)

In [7]:
latent_dim = 100  # Adjust latent dimension as needed
autoencoder = Cifar_Autoencoder(latent_dim=latent_dim)
auto_criterion = nn.MSELoss()
auto_optimizer = torch.optim.Adam(autoencoder.parameters(), lr=1e-3)
auto_num_epochs = 5

for epoch in range(auto_num_epochs):
    for images, _ in cifar10_train_loader:  # Use your CIFAR-10 DataLoader here
        auto_optimizer.zero_grad()
        
        # Forward pass
        reconstructed = autoencoder(images)
        
        # Compute reconstruction loss
        loss = auto_criterion(reconstructed, images)
        
        # Backward pass and optimization
        loss.backward()
        auto_optimizer.step()
        
    print(f"Epoch [{epoch+1}/{auto_num_epochs}], Loss: {loss.item()}")


Epoch [1/5], Loss: 0.7438429594039917
Epoch [2/5], Loss: 0.6870750188827515
Epoch [3/5], Loss: 0.6336425542831421
Epoch [4/5], Loss: 0.6450321674346924
Epoch [5/5], Loss: 0.636914074420929


In [8]:
autoencoder.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
latent_features, labels = reduce_dimensions(train_loader_auto, autoencoder.encoder, device)
latent_features = latent_features.detach()

reconstructed_images = autoencoder.decoder(latent_features.to(device))  
reconstructed_images = reconstructed_images.view(-1, 3, 32, 32) # Reshape to [batch_size, channels, height, width]

reconstructed_dataset = CustomTensorDataset(reconstructed_images.cpu(), labels)  
reduced_train_loader_auto = DataLoader(reconstructed_dataset, batch_size=batch_size_train, shuffle=True)

# Partition

In [9]:
# classic
trainingset = cifar10_train_loader.dataset
partitioned_data_classic = partition.balanced_dirichlet_partition(trainingset, partitions_number=4, alpha=0.5)

In [10]:
import cluster

cluster = cluster.Cluster(num_clusters=num_clusters)

targets = trainingset.targets
num_classes = len(set(targets)) 
clustered_data = cluster.apply_clustering(partitioned_data_classic, targets, num_classes)

partitioned_data_classic_clustered = clustered_data

In [11]:
classic_client_loaders = [
    DataLoader(Subset(trainingset, indices), batch_size=batch_size_train, shuffle=True)
    for indices in partitioned_data_classic.values()
]

In [12]:
classic_client_loaders = [
    DataLoader(Subset(trainingset, indices), batch_size=batch_size_train, shuffle=True)
    for indices in partitioned_data_classic_clustered.values()
]

In [13]:
# pca 4 clients
trainingset_pca = train_loader_reduced_pca.dataset
partitioned_data_pca = partition.balanced_dirichlet_partition(trainingset_pca, partitions_number=4, alpha=0.5)

In [14]:
import cluster

cluster = cluster.Cluster(num_clusters=num_clusters)

targets = trainingset_pca.targets
num_classes = len(set(targets)) 
clustered_data = cluster.apply_clustering(partitioned_data_pca, targets, num_classes)

partitioned_data_pca_clustered = clustered_data

In [15]:
pca_client_loaders = [
    DataLoader(Subset(trainingset_pca, indices), batch_size=batch_size_train, shuffle=True)
    for indices in partitioned_data_pca.values()
]

In [16]:
pca_client_loaders_clustered = [
    DataLoader(Subset(trainingset_pca, indices), batch_size=batch_size_train, shuffle=True)
    for indices in partitioned_data_pca_clustered.values()
]

In [17]:
# auto 4 clients
trainingset_auto = reduced_train_loader_auto.dataset
partitioned_data_auto = partition.balanced_dirichlet_partition(trainingset_auto, partitions_number=4, alpha=0.5)

In [18]:
import cluster

cluster = cluster.Cluster(num_clusters=num_clusters)

targets = trainingset_auto.targets
num_classes = len(set(targets)) 
clustered_data = cluster.apply_clustering(partitioned_data_auto, targets, num_classes)

partitioned_data_pca_clustered = clustered_data

In [19]:
auto_client_loaders = [
    DataLoader(Subset(trainingset_auto, indices), batch_size=batch_size_train, shuffle=True)
    for indices in partitioned_data_auto.values()
]

In [20]:
auto_client_loaders_clustered = [
    DataLoader(Subset(trainingset_auto, indices), batch_size=batch_size_train, shuffle=True)
    for indices in partitioned_data_pca_clustered.values()
]

# Models

In [30]:
# Define or import here models
squeezenet = models.squeezenet1_1(pretrained=True)
squeezenet.eval()
num_classes = 10
squeezenet.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=1)
squeezenet.num_classes = num_classes
model = squeezenet

trial_model = model
trial_model_pca = model
trial_model_auto = model



# Test for execution time and accuracy before federated learning loop

In [31]:
# test for errors

# Classic 

optimizer = optim.SGD(trial_model.parameters(), lr=learning_rate,
                      momentum=momentum)

train_losses = []
train_counter = []

for epoch in range(1, n_epochs + 1):  
    train(epoch, trial_model, cifar10_train_loader, optimizer, log_interval, train_losses, train_counter)



KeyboardInterrupt: 

In [25]:
test_losses_classic_weak = []
test(trial_model,cifar10_test_loader,test_losses_classic_weak)




Test set: Avg. loss: nan, Accuracy: 1000/10000 (10%)



In [None]:
# Pca weak

optimizer = optim.SGD(trial_model_pca.parameters(), lr=learning_rate,
                      momentum=momentum)

train_losses = []
train_counter = []

for epoch in range(1, n_epochs + 1):  
    train(epoch, trial_model_pca, train_loader_reduced_pca, optimizer, log_interval, train_losses, train_counter)

In [None]:
test_losses_classic_pca = []
test(trial_model_pca,train_loader_reduced_pca,test_losses_classic_pca)

In [None]:
# Auto 

optimizer = optim.SGD(trial_model_auto.parameters(), lr=learning_rate,
                      momentum=momentum)

train_losses = []
train_counter = []

for epoch in range(1, n_epochs + 1):  
    train(epoch, trial_model_auto, reduced_train_loader_auto, optimizer, log_interval, train_losses, train_counter)

In [None]:
test_losses_classic_auto = []
test(trial_model_auto,reduced_train_loader_auto,test_losses_classic_auto)