In [5]:
import math
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from confeddi import FederatedSystem
from distribute_data import generate_data
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
os.environ['PYTHONHASHSEED'] = str(50)

from collections import defaultdict
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam


In [6]:
data = pd.read_csv('RTT_data.csv')

# Getting rid of complex-valued columns
data = data.select_dtypes(exclude = ['object'])

# Separate data and labels
drop_labels = ['GroundTruthRange[m]']
X = data.drop(columns = drop_labels)
y = data['GroundTruthRange[m]']

# Quick Data Stats
print(f'Number of samples: {data.shape[0]}')
features = data.shape[1] - len(drop_labels)
print(f'Features per sample: {features}\n')

print(f'Columns:')
for i in data.columns[:-1]:
    if i in drop_labels:
        continue
    print(f'{i}, ', end = '')
print(data.columns[-1], end = '\n\n')


Number of samples: 29581
Features per sample: 10

Columns:
%Timestamp[s], GroundTruthPositionX[m], GroundTruthPositionY[m], GroundTruthPositionZ[m], ML_range_estimate[m], AP_positionX[m], AP_positionY[m], AP_positionZ[m], AP_index, ToD_factor[m]



In [7]:
seed = 11
clients = {'Client Data': [], 'Client Labels': [], 'Client Distances': []}
X_val = []
Y_val = []
X_test = []
Y_test = []
total = 0
for i in range(1, 13):
    condition = (X['AP_index'] == i)
    curr_data = X[condition].to_numpy()
    curr_labs = y[condition].to_numpy()
    
    x_split, x_val, y_split, y_val = train_test_split(curr_data, curr_labs, test_size = 0.2, random_state = seed)
    x_train, x_test, y_train, y_test = train_test_split(x_split, y_split, test_size = 0.25, random_state = seed)
    clients['Client Data'].append(x_train)
    clients['Client Labels'].append(y_train)
    X_val.append(x_val)
    Y_val.append(y_val)
    X_test.append(x_test)
    Y_test.append(y_test)


In [8]:
xval_tot = 0
xtest_tot = 0
for i in range(12):
    xval_tot += len(X_val[i])
    xtest_tot += len(X_test[i])

# before, 2nd arg was features instead of 5
X_val_data = np.zeros((xval_tot, features))
y_val_labels = np.zeros(xval_tot)
X_test_data = np.zeros((xtest_tot, features))
y_test_labels = np.zeros(xtest_tot)
idx_val = 0
idx_test = 0
for client_val, client_test in zip(zip(X_val, Y_val), zip(X_test, Y_test)):
    for i, j in zip(client_val[0], client_val[1]):
        #X_val_data[idx_val] = np.delete(i, [0, 5, 6, 7, 8])
        X_val_data[idx_val] = i
        y_val_labels[idx_val] = j
        idx_val += 1
    for i, j in zip(client_test[0], client_test[1]):
        #X_test_data[idx_test] = np.delete(i, [0, 5, 6, 7, 8])
        X_test_data[idx_test] = i
        y_test_labels[idx_test] = j
        idx_test += 1


In [9]:
'''
scaler = StandardScaler()
for i in range(len(clients['Client Data'])):
    clients['Client Data'][i] = scaler.fit_transform(np.delete(clients['Client Data'][i], [0, 5, 6, 7, 8], 1))

X_val_data = scaler.fit_transform(X_val_data)
X_test_data = scaler.fit_transform(X_test_data)
'''

"\nscaler = StandardScaler()\nfor i in range(len(clients['Client Data'])):\n    clients['Client Data'][i] = scaler.fit_transform(np.delete(clients['Client Data'][i], [0, 5, 6, 7, 8], 1))\n\nX_val_data = scaler.fit_transform(X_val_data)\nX_test_data = scaler.fit_transform(X_test_data)\n"

In [19]:
tf.keras.utils.set_random_seed(50)
tf.config.experimental.enable_op_determinism()

for client in range(12):
    model = Sequential([
        #16
        layers.Dense(16, activation = 'relu', input_shape = (10,)),
        #layers.Normalization(),
        #layers.BatchNormalization(),
        #8
        layers.Dense(8, activation = 'relu'),
        layers.LayerNormalization(),
        #4
        #layers.Dense(8, activation = 'relu'),
        layers.Dense(1)
    ])
    #16 8 8 1
    #client = 11

    print(f'Client {client}')
    samples = int(len(clients['Client Labels'][client]) * 0.8)
    X = clients['Client Data'][client][:samples]
    y = clients['Client Labels'][client][:samples]
    xtest = clients['Client Data'][client][samples:]
    ytest = clients['Client Labels'][client][samples:]
    model.compile(optimizer = Adam(learning_rate = 0.005), loss = 'mse')
    history = model.fit(X, y, validation_split = 0.2, epochs = 20, verbose = 0, shuffle = False, use_multiprocessing = True)
    pred = model.predict(xtest, verbose = 0)
    diff = np.absolute(pred.reshape(len(ytest),) - ytest)
    print(f'Average Error: {np.sum(diff) / len(ytest)}')
    #model.evaluate(xtest, ytest)

Client 0
Average Error: 1.7145915978257553
Client 1
Average Error: 1.3795484950051913
Client 2
Average Error: 1.9103776499430336
Client 3
Average Error: 3.8179725723898
Client 4
Average Error: 3.7418863666917423
Client 5
Average Error: 2.3994828780070674
Client 6
Average Error: 1.822820002245916
Client 7
Average Error: 1.3948940674994332
Client 8
Average Error: 1.8034423221782103
Client 9
Average Error: 5.396556434851381
Client 10
Average Error: 4.851035280628205
Client 11
Average Error: 10.472738855189732


In [20]:
np.sum(diff < 10) / len(diff)

0.5485714285714286

In [21]:
plt.plot(log, mse, color = 'blue', label = 'conf_mse', marker = 'o')
#plt.plot(fedavg_log, fedavg_mse, color = 'green', label = 'fedavg_mse', marker = 'o')
plt.title('Error')
#plt.ylim(10, 35)
plt.ylabel('Error')
plt.xlabel('Time (s)')
plt.grid()
plt.legend()

print(mse[-1])
fed.clear_history()

NameError: name 'log' is not defined

In [None]:
model = fed.generate_model(w, b)

In [None]:
pred = model.predict(X_test_data)



In [None]:
y_test_labels

array([23.049 ,  7.4971, 14.987 , ...,  1.9869,  5.2789,  1.7645])

In [None]:
X_test_data[0]

array([ 1.0488e+03, -1.8610e+01,  2.3301e-01,  4.4900e+01,  3.6373e+01,
       -1.1004e+01,  2.1952e+01,  4.6200e+01,  1.0000e+00,  1.7460e+01])

In [None]:
y_test_labels[0]

23.049

In [None]:
pred[0]

array([-92.58483], dtype=float32)