## Imports

In [1]:
# standard library
import sys, copy, csv

# external packages
import torch
from torchvision import datasets, transforms
from torch import nn, optim
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor

import numpy as np
import matplotlib.pyplot as plt

# local packages
from utils import add_ids 
from class_split_data_loader import ClassSplitDataLoader
from autoencoder import Autoencoder

In [2]:
# Setup BigQuery upload
from BQuploader import BQuploader
BQ_project_id = 'vertical-federated-learning'
dataset_id = 'experiment_data'
bq_uploader = BQuploader(BQ_project_id, dataset_id)
# To upload a local file:
# bq_uploader.load_local_file_to_table(file_name, table_name)

## Setup

In [3]:
settings = {
    'criterion' : nn.MSELoss(),
    'epochs' : 20,
    'n_encoders' : 3,
    'input_size' : 784,
    'hidden_sizes_encoder' : [512, 256],
    'hidden_sizes_decoder' : [256, 512],
    'encoded_size' : 128,
}

In [4]:
# import data
data_train = add_ids(MNIST)(".", download=True, transform=ToTensor())
data_test  = add_ids(MNIST)(".", train=False, download=True, transform=ToTensor())

In [5]:
# and create dataloaders
dataloaders = []
dataloaders_test = []
for k in range(settings['n_encoders']):
    dataloader = ClassSplitDataLoader(data_train, class_to_keep=k, remove_data=False, keep_order=True, batch_size=128) 
    dataloaders.append(dataloader)
    dataloader = ClassSplitDataLoader(data_test, class_to_keep=k, remove_data=False, keep_order=True, batch_size=128) 
    dataloaders_test.append(dataloader)
    # partition_dataset uses by default "remove_data=True, keep_order=False"

# and add them to the settings
settings['dataloaders'] = dataloaders
settings['dataloaders_test'] = dataloaders_test

## Creating the network

In [6]:
net = Autoencoder(settings)

## Train the network

In [10]:
def train_network(net):
    train_perf, test_perf = [], []
    for i in range(settings['epochs']):

        running_loss = np.zeros(settings['n_encoders'])
        running_test_MSE = np.zeros(settings['n_encoders'])

        for k in range(settings['n_encoders']):
            loss_train, loss_test = net.iter_training_one_encoder(k)
            running_loss[k] += loss_train
            running_test_MSE[k] += loss_test

        print(f"Epoch {i}/{settings['epochs']}"
                +f" - Training loss: {np.average(running_loss)/settings['n_encoders']:.4f}"
                +f" - testing MSE: {np.average(running_test_MSE)/settings['n_encoders']:.4f}")
        train_perf.append(running_loss/settings['n_encoders'])
        test_perf.append(running_test_MSE/settings['n_encoders'])
    return train_perf, test_perf


In [11]:
# BQ table
table_name = 'autoencoder_1'
file_name = 'temp.csv'

repeats = 5
for n_encoders in [1, 3, 10]:
    for encoded_size in [128, 56, 10]:
        with open(file_name,'w') as f:
            writer = csv.writer(f)
            writer.writerow(['criterion','n_encoders','epochs','encoded_size','train_or_test','class','MSE'])

            settings['n_encoders'] = n_encoders
            settings['encoded_size'] = encoded_size
            settings['dataloaders'] = dataloaders
            settings['dataloaders_test'] = dataloaders_test
            net = Autoencoder(settings)
            BQsettings = [str(settings[x]) for x in ['criterion','n_encoders','epochs','encoded_size']]
            for i in range(repeats):
                train_perf, test_perf = train_network(net)
                for k in range(n_encoders):
                    writer.writerow(BQsettings + ['train', k, train_perf[-1][k]])
                    writer.writerow(BQsettings + ['test', k, test_perf[-1][k]])

        bq_uploader.load_local_file_to_table(file_name, table_name)

Epoch 0/20 - Training loss: 0.1447 - testing MSE: 0.1413
Epoch 1/20 - Training loss: 0.1411 - testing MSE: 0.1397
Epoch 2/20 - Training loss: 0.1398 - testing MSE: 0.1386
Epoch 3/20 - Training loss: 0.1388 - testing MSE: 0.1379
Epoch 4/20 - Training loss: 0.1383 - testing MSE: 0.1375
Epoch 5/20 - Training loss: 0.1379 - testing MSE: 0.1373
Epoch 6/20 - Training loss: 0.1378 - testing MSE: 0.1371
Epoch 7/20 - Training loss: 0.1376 - testing MSE: 0.1369
Epoch 8/20 - Training loss: 0.1374 - testing MSE: 0.1368
Epoch 9/20 - Training loss: 0.1373 - testing MSE: 0.1367
Epoch 10/20 - Training loss: 0.1372 - testing MSE: 0.1366
Epoch 11/20 - Training loss: 0.1371 - testing MSE: 0.1365
Epoch 12/20 - Training loss: 0.1370 - testing MSE: 0.1365
Epoch 13/20 - Training loss: 0.1370 - testing MSE: 0.1364
Epoch 14/20 - Training loss: 0.1369 - testing MSE: 0.1364
Epoch 15/20 - Training loss: 0.1369 - testing MSE: 0.1364
Epoch 16/20 - Training loss: 0.1369 - testing MSE: 0.1363
Epoch 17/20 - Training l

ERROR:root:While loading to the table, errors were found : There are no column descriptions provided for table autoencoder_1_11a47d97_d002_4724_bf23_091c730a8fd7_source


Exception: Upload failed for table autoencoder_1

In [None]:
fig = plt.figure()
ax = plt.axes()
train_perf, test_perf = np.array(train_perf), np.array(test_perf)
x = range(settings['epochs'])
for k in range(settings['n_encoders']):
    ax.plot(x, train_perf[:,k], label=f'training data,  encoder {k}')
    ax.plot(x, test_perf[:,k], label=f'testing data, encoder {k}')

plt.title("Learning curves")
plt.xlabel("iterations")
plt.ylabel("MSE")
plt.legend()
plt.show()