In [None]:
import json
import os
import pickle

import numpy as np
from matplotlib import pyplot as plt
from scipy.interpolate import UnivariateSpline

from utils import average_server, average_simulation


In [None]:
SIMULATION_DIRECTORY = 'simulation'
SIMULATION_HISTORY = 'simulation_history'
TESTS_DIRECTORY = 'tests_average'

In [None]:
with open('config.json', 'r') as input:
    config = json.load(input)

In [None]:
test_number = len(os.listdir(TESTS_DIRECTORY))

<style>
    h1 {
        text-align: center;
    }
</style>


# Averaging the simulation for the clients

In [None]:
average = average_simulation()

# average

In [None]:
min(average['loss'])

In [None]:
plt.plot(average['loss'])
plt.show()

<style>
    h1 {
        text-align: center;
    }
</style>


# Averaging the simulation for the server

In [None]:
server_average = average_server()

# server_average

In [None]:
server_average['loss'][-1]

In [None]:
plt.plot(server_average['loss'])
plt.show()

In [None]:
val = [(abs(server_average['loss'][i+1] - server_average['loss'][i]) / abs(server_average['loss'][i-1] - server_average['loss'][i]))
    for i in range(1, len(server_average['loss']) - 80)]

print(np.median(val))

plt.plot(val)
# plt.yscale('log')
# plt.ylim(-0.2, 0)

plt.show()

<style>
    h1 {
        text-align: center;
    }
</style>


# Saving a summary of the simulation

We are saving:
1. The configuration that was used during the run
2. The average loss over all the clients
3. The average loss over the server

In [None]:
summary = {
    'configuration_used': config,
    'client_loss': average['loss'],
    'server_loss': server_average['loss']
}

with open(f'{TESTS_DIRECTORY}/test{test_number:03d}.json', 'w+') as output:
    json.dump(summary, output)
    test_number += 1

<style>
    h1 {
        text-align: center;
    }
</style>


# Other data manipulation

In [None]:
# try:
#     with open(os.path.join(SIMULATION_DIRECTORY, 'client3'), 'rb') as input:
#         data = pickle.load(input)

#     for i in data:
#         plt.plot(i['loss'])

#     plt.show()
# except:
#     pass

In [None]:
# d = [i for i in os.listdir(SIMULATION_DIRECTORY) if not i.startswith('.')]
# all_data = {}

# for file in d:
#     with open(os.path.join(SIMULATION_DIRECTORY, file), 'rb') as input:
#         data = pickle.load(input)
#         for iteration in data:
#             for round_num in range(len(iteration['loss'])):
#                 if round_num not in all_data.keys():
#                     all_data[round_num] = []
#                 all_data[round_num].append(iteration['loss'][round_num])

# for key, d in all_data.items():
#     mean = statistics.mean(d)
#     sd = statistics.stdev(d)

#     print(f'mean = {mean}')
#     print(f'standard deviation = {sd}')

#     plt.scatter(d, norm.pdf(d, mean, sd))
#     # plt.scatter(d, t.pdf(d, mean, sd), color = 'r')
#     plt.show()

In [None]:
EPSILON = 0.00001

test_dir = [i for i in os.listdir(TESTS_DIRECTORY) if not i.startswith('.')]
d = {}
conv = 0

for file in test_dir:
    with open(os.path.join(TESTS_DIRECTORY, file)) as input:
        data = json.load(input)

    for i in range(len(data['server_loss']) - 1):
        if abs(data['server_loss'][i+1] - data['server_loss'][i]) < EPSILON:
            conv = i
            break

    d[data['configuration_used']['server']['strategy']['min_available_clients']] = {'MSE': data['server_loss'][-1], 'conv': conv if conv != 0 else len(data['server_loss'])}
    # print(data['configuration_used'])
    # plt.plot(data['client_loss'])
    # plt.plot(data['server_loss'])
    # plt.legend(['Client Loss', 'Server Loss'])
    # plt.show()
    # print('########')

d = {k: d[k] for k in sorted(d)}

In [None]:
# plotting MSE calculated through Kate
d_conf = config['data']

# d_conf = {
#     'mu': 50,
#     'number_of_samples': 12,
#     'sigma': 2
# }

LAMBDA = 0.1
S = 0.1
T = 10

x = np.linspace(1, T, T)

mse_kate_linear = [\
    (d_conf['mu'] / (i * (d_conf['number_of_samples'] - 2)))
    + (d_conf['sigma']**2 * (i - 1) / i)
    + config['cost'] * (i - 1)
    # + d_conf['mu']
for i in x]

ALPHA = (\
    d_conf['mu'] / (d_conf['number_of_samples'] - 2) -
    (d_conf['mu'] / (d_conf['number_of_samples'] - 2) - d_conf['sigma']**2) / T -
    d_conf['sigma']**2) / np.exp(2 * LAMBDA * S * T)

mse_kate_exp = [\
    (d_conf['mu'] / (i * (d_conf['number_of_samples'] - 2)))
    + (d_conf['sigma']**2 * (i - 1) / i)
    + ALPHA * np.exp(2 * LAMBDA * S * i)
    # + d_conf['mu']
for i in x]

# plt.plot(x, mse_kate_linear)
# plt.plot(x, mse_kate_exp)

# plt.legend(['with linear cost', 'with exponential cost'])

plt.plot(x, mse_kate_exp)

plt.legend(['with exponential cost'])

plt.xlabel('number of players')
plt.ylabel('MSE')

plt.show()

print(min(mse_kate_exp), mse_kate_exp.index(min(mse_kate_exp)))

In [None]:
mse = [i['MSE'] for i in d.values()]
# mse = [i['MSE'] - config['cost'] * (n - 1) for n, i in d.items()]

# plotting MSE as it is
plt.plot(d.keys(), mse, '-x')

# plotting a more fitted version
# try:
#     a = UnivariateSpline(list(d.keys()), mse)
#     cx = np.linspace(list(d.keys())[0], mse[-1])
#     plt.plot(d.keys(), a(list(d.keys())), '-o')
# except:
#     pass

plt.show()

In [None]:
conv = [i['conv'] for i in d.values()]
plt.plot(d.keys(), conv, '-x')

try:
    a = UnivariateSpline(list(d.keys()), conv)
    cx = np.linspace(list(d.keys())[0], conv[-1])
    plt.plot(d.keys(), a(list(d.keys())), '-o')
except:
    pass

plt.show()

In [None]:
mi = []

for i in os.listdir(SIMULATION_SERVER_HISTORY):
    if i == '.gitkeep':
        continue
    print(f'###### {i} #######')
    path = f'{SIMULATION_SERVER_HISTORY}/{i}'
    for j in os.listdir(path):
        for k in os.listdir(f'{path}/{j}'):
            with open(f'{path}/{j}/{k}', 'rb') as inf:
                data = pickle.load(inf)
            mi.append(data[0]['loss'][-1])
            plt.plot(data[0]['loss'])
            plt.show()

In [None]:
x = np.linspace(1, len(mi), len(mi))
plt.plot(x, mi)
plt.show()

In [None]:
for i in os.listdir(SIMULATION_HISTORY):
    if i == '.gitkeep':
        continue
    print(f'###### {i} #######')
    path = f'{SIMULATION_HISTORY}/{i}'
    for j in os.listdir(path):
        print(f'###### {j} #######')
        for k in os.listdir(f'{path}/{j}'):
            print(f'###### {k} #######')
            with open(f'{path}/{j}/{k}', 'rb') as inf:
                data = pickle.load(inf)
            plt.plot(data[0]['loss'])
            plt.show()

In [None]:
with open('simulation_history/seed_1002/clients_1/server', 'rb') as input:
    data = pickle.load(input)

plt.plot(data[0]['loss'])