In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import copy
import time
import random
import threading

2023-09-08 17:33:18.557051: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
batch_size = 1024
learning_rate = 0.001

In [3]:
@tf.keras.saving.register_keras_serializable()
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense1 = tf.keras.layers.Dense(units=128, activation=tf.nn.leaky_relu)
        self.dense2 = tf.keras.layers.Dense(units=1024, activation=tf.nn.leaky_relu)
        self.dense3 = tf.keras.layers.Dense(units=128, activation=tf.nn.leaky_relu)
        self.dense4 = tf.keras.layers.Dense(units=1024, activation=tf.nn.leaky_relu)
        self.dense5 = tf.keras.layers.Dense(units=8)

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        output = self.dense5(x)
        return output

In [4]:
class ParaServer:
    def __init__(self):
        self.model = MLP()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        self.freqs = {}
    def upload(self, grads, freq, score):
        self.freqs[freq] = max(0, score)
        self.optimizer.apply_gradients(grads_and_vars=zip(grads, self.model.variables))
        return self.model, self.freqs
    def download(self):
        return self.model, self.freqs
    def initModel(self, x):
        self.model(x)

In [5]:
def valiAll(index_epoch):
    m, _ = ps.download()
    model = copy.deepcopy(m)
    y_v_p = model(X_v)
    va_mse = tf.reduce_mean(tf.square(y_v_p - y_v))
    va_rmse = tf.sqrt(va_mse)
    va_mae = tf.reduce_mean(tf.abs(y_v_p - y_v))
    va_r2 = 1 - tf.reduce_sum(tf.square(y_v_p - y_v)) / tf.reduce_sum(tf.square(y_v - tf.reduce_mean(y_v)))
    print("mse:{} rmse:{} mae:{} r2:{}".format(va_mse, va_rmse, va_mae, va_r2))
    r2sv[index_epoch] = va_r2.numpy()

In [6]:
class Node:
    def __init__(self, dsName, freq):
        self.freq = freq
        self.otfreqs = {}
        self.model = MLP()
        self.dataset = pd.read_csv(dsName, encoding='utf-8').sample(frac=1).reset_index(drop=True)
        self.X = self.dataset.loc[:,'freq':'L4'].to_numpy(dtype = np.float32)
        self.y = self.dataset.loc[:,'S11r':'S41i'].to_numpy(dtype = np.float32)
        self.dataset_train = tf.data.Dataset.from_tensor_slices((self.X, self.y))
        self.dataset_train = self.dataset_train.shuffle(buffer_size=23000)
        self.dataset_train = self.dataset_train.batch(batch_size)
        self.dataset_train = self.dataset_train.prefetch(tf.data.experimental.AUTOTUNE)
        self.zeroModel = MLP()
    def getZero(self):
        m, freqs = ps.download()
        self.otfreqs = copy.deepcopy(freqs)
        self.zeroModel = copy.deepcopy(m)
    def train(self, index_epoch):
        m, _ = ps.download()
        self.model = copy.deepcopy(m)
        for X, y in self.dataset_train:
            X_self = X
            with tf.GradientTape() as tape:
                y_pred = self.model(X_self)
                tr_mse = tf.reduce_mean(tf.square(y_pred - y))
            tr_rmse = tf.sqrt(tr_mse)
            tr_mae = tf.reduce_mean(tf.abs(y_pred - y))
            tr_r2 = 1 - tf.reduce_sum(tf.square(y_pred - y)) / tf.reduce_sum(tf.square(y - tf.reduce_mean(y)))
            grads = tape.gradient(tr_mse, self.model.variables)
            sum_r2 = 1
            for k, v in self.otfreqs.items():
                if k == self.freq or v == 0:
                    continue
                X_i = tf.tensor_scatter_nd_update(X, [[i, 0] for i in range(X.shape[0])], [k] * X.shape[0])
                y_i = self.zeroModel(X_i)
                with tf.GradientTape() as tape:
                    y_pred_i = self.model(X_i)
                    loss = tf.reduce_mean(tf.square(y_pred_i - y_i))
                grad = tape.gradient(loss, self.model.variables)
                grads = [grads[i] + grad[i] * v for i in range(len(grads))]
                sum_r2 += v
            m, _ = ps.upload([i / sum_r2 for i in grads], self.freq, tr_r2.numpy())
            self.model = copy.deepcopy(m)
        # if epoch_index in np.arange(0, num_epochs, 25).tolist() or epoch_index == num_epochs - 1:
        if True:
            print("node:{} epoch:{}".format(self.freq, index_epoch))
            print("train mse:{} rmse:{} mae:{} r2:{}".format(tr_mse, tr_rmse, tr_mae, tr_r2))
            r2s[self.freq][index_epoch] = tr_r2.numpy()

In [7]:
r2s = {2.4:{},2.5:{},2.6:{}}
r2sv = {}

In [8]:
test_dataset = pd.read_csv("Test.csv", encoding='utf-8').sample(frac=1).reset_index(drop=True)
X_v = test_dataset.loc[:,'freq':'L4'].to_numpy(dtype = np.float32)
y_v = test_dataset.loc[:,'S11r':'S41i'].to_numpy(dtype = np.float32)

In [9]:
ps = ParaServer()
ps.initModel(X_v)

2023-09-08 17:33:20.647983: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9604 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:17:00.0, compute capability: 7.5
2023-09-08 17:33:20.648552: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 9621 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:65:00.0, compute capability: 7.5


In [10]:
nodeList = [Node('./24Train.csv', 2.4), Node('./25Train.csv', 2.5), Node('./26Train.csv', 2.6)]

In [11]:
orders = [0, 1, 2]
turn = [np.array([[26, 104], [178, 312], [344, 464], [520, 600]]), np.array([[0, 94], [149, 223], [319, 433], [464, 580]]), np.array([[32, 151], [155, 248], [270, 354], [378, 502]])]
for i in range(600):
    random.shuffle(orders)
    for j in orders:
        for l, r in turn[j]:
            if l <= i < r:
                if l == i:
                    nodeList[j].getZero()
                nodeList[j].train(i)
    valiAll(i)

2023-09-08 17:33:21.581665: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5616e0df7130 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-09-08 17:33:21.581686: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 2080 Ti, Compute Capability 7.5
2023-09-08 17:33:21.581690: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (1): NVIDIA GeForce RTX 2080 Ti, Compute Capability 7.5
2023-09-08 17:33:21.584696: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-09-08 17:33:21.689721: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2023-09-08 17:33:21.797544: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


node:2.5 epoch:0
train mse:0.12580235302448273 rmse:0.35468626022338867 mae:0.28514188528060913 r2:-0.03928649425506592
mse:0.1078781858086586 rmse:0.3284481465816498 mae:0.2694229781627655 r2:0.1056249737739563
node:2.5 epoch:1
train mse:0.08941522240638733 rmse:0.2990237772464752 mae:0.24533699452877045 r2:0.2598140835762024
mse:0.09269672632217407 rmse:0.30446135997772217 mae:0.24845385551452637 r2:0.2314884066581726
node:2.5 epoch:2
train mse:0.08246558159589767 rmse:0.2871682047843933 mae:0.23420575261116028 r2:0.3158109188079834
mse:0.08772169053554535 rmse:0.2961784899234772 mae:0.2408897876739502 r2:0.27273446321487427
node:2.5 epoch:3
train mse:0.08275166898965836 rmse:0.2876659035682678 mae:0.23486657440662384 r2:0.31503456830978394
mse:0.08662708848714828 rmse:0.2943247854709625 mae:0.2378569394350052 r2:0.28180938959121704
node:2.5 epoch:4
train mse:0.08076130598783493 rmse:0.28418534994125366 mae:0.2303665578365326 r2:0.3290473222732544
mse:0.08357305079698563 rmse:0.28909

In [17]:
for k, v in r2sv.items():
    print(k, v)

0 0.105624974
1 0.2314884
2 0.27273446
3 0.2818094
4 0.30712914
5 0.32614583
6 0.32382143
7 0.34921402
8 0.35023367
9 0.35484523
10 0.3446623
11 0.3799495
12 0.39420992
13 0.3866803
14 0.39536434
15 0.42120218
16 0.4119432
17 0.41113114
18 0.4231065
19 0.40022635
20 0.41630852
21 0.43001205
22 0.43018818
23 0.4363916
24 0.46036822
25 0.44642
26 0.43871075
27 0.47393906
28 0.4509619
29 0.48089147
30 0.4922188
31 0.48365885
32 0.49709773
33 0.5073036
34 0.51151
35 0.5108922
36 0.5255027
37 0.52867293
38 0.51911104
39 0.5383643
40 0.5597607
41 0.56280786
42 0.56912386
43 0.58369964
44 0.5647127
45 0.5710807
46 0.5814314
47 0.58186746
48 0.574474
49 0.5587739
50 0.6225122
51 0.60776424
52 0.60062873
53 0.5897071
54 0.5880201
55 0.58374226
56 0.60278296
57 0.59314644
58 0.58982486
59 0.5875259
60 0.5839905
61 0.6112983
62 0.6653269
63 0.55517983
64 0.59175235
65 0.6069933
66 0.6039334
67 0.58920443
68 0.58506644
69 0.6763774
70 0.66808534
71 0.5896965
72 0.55561006
73 0.6165236
74 0.6021598