## Cloud Computing for Distributed Big Data Applications Option : Centralized Machine Learning

#### The goal of this lab is to get you familiar with the basics of machine learning.

#### You will be asked to train an ML model for a relatively simple dataset 'MNIST' using the torch package

In [23]:
pip install torch

Note: you may need to restart the kernel to use updated packages.


In [24]:
pip install torchvision


Note: you may need to restart the kernel to use updated packages.


In [25]:
pip install matplotlib

Note: you may need to restart the kernel to use updated packages.


In [26]:
## import the package

import torch

In [27]:
from IPython import display
from base64 import b64decode

#display.Image(b64decode(base64_data))

See the documentation
https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html 

#### Below is a potentially good model architecture to start with. 
![](model_architecture.jpeg)


In [28]:
## create the model 

import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self,):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(10, 20, 5)
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(320, 50)  # 5*5 from image dimension
        self.fc2 = nn.Linear(50, 10)


        ## define the model layer according to the architecture
    def forward(self, x):
        x1 = F.relu(self.conv1(x))
        x2 = self.pool1(x1)
        x3 = F.relu(self.conv2(x2))
        x4 = self.pool2(x3)
        x5 = x4.view(-1, 320)
        x6 = F.relu(self.fc1(x5))
        x7 = F.relu(self.fc2(x6))
        return x7

See Documentation https://pytorch.org/vision/stable/datasets.html

##### The MNIST dataset is  a database of handwritten digits. it has a training set of 60,000 examples, and a test set of 10,000 examples.  The digits have been size-normalized and centered in a fixed-size image.

##### In the next two cells, you are asked to first : load both the training and test set of MNIST while appllying  the appropriate transformations & second : take a closer look at the dataset by checking the images' shape, the label & displaying some images ( Matplolib library is an option for that)

In [29]:
## Prepare Dataset
from torchvision import datasets, transforms


def get_dataset():  

    """ Returns train and test datasets for MNIST
        Data is normalized using (0.1307,), (0.3081,)
    """
    transforms_applied = transforms.Compose([
    transforms.ToTensor(),
   # transforms.Normalize((0.1307,), (0.3081,)),
     ])

    transforms_normalize = transforms.Normalize((0.1307,), (0.3081,))
    train_dataset = datasets.MNIST(
          "MNIST/processed/", train = True, download = True, transform = transforms_applied)
    test_dataset = datasets.MNIST(
        "MNIST/processed/", train = False, download = True, transform = transforms_applied)
            # "MNIST/processed/", train = False, download = True, transform = transforms_normalize)
    return train_dataset, test_dataset
  


In [30]:
import numpy as np
datasets_dirichlet_train_x = {}
datasets_dirichlet_train_y = {}
for i in range(10):
    datasets_dirichlet_train_x[i] = []
    datasets_dirichlet_train_y[i] = []



In [31]:
train_dataset, test_dataset = get_dataset()
alpha = 1
for i, object in enumerate(iter(train_dataset)):
    base_probability = np.ones(10)
    base_probability[object[1]] = alpha
    prob = np.random.dirichlet(base_probability)
    dataset_id = np.random.choice(10, p=prob)
    datasets_dirichlet_train_x[dataset_id].append(object[0])
    datasets_dirichlet_train_y[dataset_id].append(object[1])
    

print("finished")

finished


In [32]:
dataloader_train = {}
for i in range(10):
    tensor_x = torch.Tensor(len(datasets_dirichlet_train_x[i]), 28, 28)
    torch.cat(datasets_dirichlet_train_x[i], out = tensor_x)
    tensor_x = tensor_x.view(len(datasets_dirichlet_train_x[i]), 1, 28, 28)
    tensor_y = torch.Tensor(datasets_dirichlet_train_y[i])
    tensor_y = tensor_y.long()

    dataset_train = torch.utils.data.TensorDataset(tensor_x,tensor_y) # create your datset
    dataloader_train[i] = torch.utils.data.DataLoader(dataset_train, batch_size = 64) # create your dataloader
  


In [33]:
def get_test_dataloader():
    return torch.utils.data.DataLoader(test_dataset, batch_size = 64)


In [34]:
device = "cuda"

##### Here you have to implement the train function of your model. 

##### A test function is provided for the evaluation of the model.


In [35]:
def train(epoch, client_net, optimizer, criterion, trainloader_client):
    loss = 0.0
    client_net.train()
    for batch_idx, (images, labels) in enumerate(trainloader_client):
        #images, labels = images.to(device), labels.to(device)

        # Inference
        optimizer.zero_grad()
        outputs = client_net(images)
        batch_loss = criterion(outputs, labels)
        batch_loss.backward()
        optimizer.step()

        ## Initialize the model
        ##  classify the images using the model
        ## compute the loss between the real labels and predicted labels
        ## backpropagate the loss
        ## update the optimizer
        ## display the loss & epoch, every 10 batchs for instance.
    return client_net
def test(model, criterion, testloader):
    model.eval()
    loss, total, correct = 0.0, 0.0, 0.0

    for batch_idx, (images, labels) in enumerate(testloader):
        #images, labels = images.to(device), labels.to(device)

        # Inference
        outputs = model(images)
        batch_loss = criterion(outputs, labels)
        loss += batch_loss.item()

        # Prediction
        _, pred_labels = torch.max(outputs, 1)
        pred_labels = pred_labels.view(-1)
        correct += torch.sum(torch.eq(pred_labels, labels)).item()
        total += len(labels)

    accuracy = correct / total
    return accuracy*100, loss

In [36]:
import numpy as np
import random
import torch
#torch.manual_seed(0)
#random.seed(0)
#np.random.seed(0)

In [37]:
def get_trainloader_client_id(id):
    return dataloader_train[id]

In [38]:
def prepare_datasets():
    #TODO: prepare datasete dict accordingly, for now it was just done with several cells.
    pass

See Documentation : https://pytorch.org/docs/stable/generated/torch.optim.SGD.html and https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html

In [47]:
from time import time
import copy 



def main():
    
    ## You can play with these parameters & observe how they effect the training process.
    learning_rate = 0.001
    epochs = 100
    base_net = None
    base_net = Net()
    
    ## instantiate your model

    ## create two dataloaders (train & test) to load data into batches   
    #train_datatset, test_dataset = prepare_datasets()
    ## instantiate an optimizer for you model, as well as a criterion/loss function
    dataloader_test = get_test_dataloader()
   

    # Evaluating the model before any training steps is a good practice
    #accuracy, loss = test(net_dict[, criterion, testloader)
    #print('Before training :',f'Accuracy: {accuracy}', f'Loss: {loss}' )
    
    time0 = time() 
    for epoch in range(epochs):
        active_clients = np.random.choice(np.arange(10), 3, replace=False) 
        net_dict = {}
        for i in active_clients:
            net_dict[i] = copy.deepcopy(base_net)
            trainloader_client = get_trainloader_client_id(i)
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.SGD(net_dict[i].parameters(), lr=learning_rate)
            accuracy, loss = test(net_dict[i], criterion, dataloader_test)
            train(epoch, net_dict[i], optimizer, criterion, trainloader_client)
            print(f'Epoch :{epoch}', f'Client: {i}',f'Accuracy: {accuracy}', f'Loss: {loss}' )

        base_net = aggregrate_models(net_dict, base_net, active_clients)
        #base_net = net_dict[active_clients[0]]
        accuracy, loss = test(base_net, criterion, dataloader_test)

        print(f'Epoch :{epoch}',f'Accuracy: {accuracy}', f'Loss: {loss}' )
    print(f'Training Time (minutes) :{(time()-time0) / 60}')



In [48]:
def aggregrate_models(net_dict, base_net, active_clients): 
    update_state = {}
    for k in active_clients:
        first = True
        for key in net_dict[k].state_dict().keys():
            if first:
                update_state[key] = net_dict[k].state_dict()[key]
            else:
                update_state[key] += net_dict[k].state_dict()[key]
        first = False
    for key in update_state:
        update_state[key] = update_state[key] / len(active_clients)

    base_net.load_state_dict(update_state)
    return base_net

In [49]:
main()

Epoch :0 Client: 1 Accuracy: 9.34 Loss: 361.5147907733917
Epoch :0 Client: 2 Accuracy: 9.34 Loss: 361.5147907733917
Epoch :0 Client: 5 Accuracy: 9.34 Loss: 361.5147907733917
Epoch :0 Accuracy: 10.32 Loss: 361.4740743637085
Epoch :1 Client: 9 Accuracy: 10.32 Loss: 361.4740743637085
Epoch :1 Client: 7 Accuracy: 10.32 Loss: 361.4740743637085
Epoch :1 Client: 8 Accuracy: 10.32 Loss: 361.4740743637085


KeyboardInterrupt: 

In [None]:
np.random.choice(np.arange(10), 8, replace=False)

array([4, 6, 5, 3, 8, 9, 2, 7])