In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import copy
import random
import math
import os

2023-09-18 15:26:35.550330: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

In [3]:
batch_size = 256
l_rate = 0.001

In [4]:
@tf.keras.saving.register_keras_serializable()
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense1 = tf.keras.layers.Dense(units=512, activation=tf.nn.leaky_relu)
        self.dense2 = tf.keras.layers.Dense(units=1024, activation=tf.nn.leaky_relu)
        self.dense3 = tf.keras.layers.Dense(units=512, activation=tf.nn.leaky_relu)
        self.dense4 = tf.keras.layers.Dense(units=256, activation=tf.nn.leaky_relu)
        self.dense5 = tf.keras.layers.Dense(units=8)

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        output = self.dense5(x)
        return output

In [5]:
class ParaServer:
    def __init__(self):
        self.model = MLP()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=l_rate)
        self.freqs = {}
    def upload(self, grads, freq, score):
        self.freqs[freq] = max(0, score)
        self.optimizer.apply_gradients(grads_and_vars=zip(grads, self.model.variables))
        return self.model, self.freqs
    def download(self):
        return self.model, self.freqs
    def initModel(self, x):
        self.model(x)
    def lr_decay(self, ratio):
        self.optimizer.learning_rate = self.optimizer.learning_rate * ratio

In [6]:
def valiAll(index_epoch):
    model, _ = ps.download()
    y_v_p = model(X_v)
    va_mse = tf.reduce_mean(tf.square(y_v_p - y_v))
    va_rmse = tf.sqrt(va_mse)
    va_mae = tf.reduce_mean(tf.abs(y_v_p - y_v))
    va_r2 = 1 - tf.reduce_sum(tf.square(y_v_p - y_v)) / tf.reduce_sum(tf.square(y_v - tf.reduce_mean(y_v)))
    print("mse:{} rmse:{} mae:{} r2:{}".format(va_mse, va_rmse, va_mae, va_r2))
    r2sv[index_epoch] = va_r2.numpy()

In [7]:
class Node:
    def __init__(self, dsName, freq):
        self.freq = freq
        self.otfreqs = {}
        self.model = MLP()
        self.zeroModel = MLP()
        self.dataset = pd.read_csv(dsName, encoding='utf-8').sample(frac=1).reset_index(drop=True)
        self.X = self.dataset.loc[:,'freq':'L4'].to_numpy(dtype = np.float32)
        self.y = self.dataset.loc[:,'S11r':'S41i'].to_numpy(dtype = np.float32)
        self.dataset_train = tf.data.Dataset.from_tensor_slices((self.X, self.y))
        self.dataset_train = self.dataset_train.shuffle(buffer_size=self.X.shape[0])
        self.dataset_train = self.dataset_train.batch(batch_size)
        self.dataset_train = self.dataset_train.prefetch(tf.data.experimental.AUTOTUNE)
    def getZero(self):
        m, freqs = ps.download()
        self.otfreqs = copy.deepcopy(freqs)
        self.zeroModel = copy.deepcopy(m)
        print(self.otfreqs)
    def train(self, index_epoch):
        self.model, _ = ps.download()
        for X, y in self.dataset_train:
            with tf.GradientTape() as tape:
                y_pred = self.model(X)
                tr_mse = tf.reduce_mean(tf.square(y_pred - y))
            tr_rmse = tf.sqrt(tr_mse)
            tr_mae = tf.reduce_mean(tf.abs(y_pred - y))
            tr_r2 = 1 - tf.reduce_sum(tf.square(y_pred - y)) / tf.reduce_sum(tf.square(y - tf.reduce_mean(y)))
            if tr_r2 < 0 and index_epoch > 50:
                print(self.model)
                print(X)
                print(self.otfreqs)
                print(y_pred)
                print(y)
            grads = tape.gradient(tr_mse, self.model.variables)
            sum_r2 = 1
            for k, v in self.otfreqs.items():
                if math.isclose(k, self.freq) or math.isclose(v, 0):
                    continue
                X_i = tf.tensor_scatter_nd_update(X, [[i, 0] for i in range(X.shape[0])], [k] * X.shape[0])
                y_i = self.zeroModel(X_i)
                with tf.GradientTape() as tape:
                    y_pred_i = self.model(X_i)
                    loss = tf.reduce_mean(tf.square(y_pred_i - y_i))
                grad = tape.gradient(loss, self.model.variables)
                grads = [grads[i] + grad[i] * v for i in range(len(grads))]
                sum_r2 += v
            self.model, _ = ps.upload([i / sum_r2 for i in grads], self.freq, tr_r2.numpy())
        # if epoch_index in np.arange(0, num_epochs, 25).tolist() or epoch_index == num_epochs - 1:
        print("node:{} epoch:{}".format(self.freq, index_epoch))
        print("train mse:{} rmse:{} mae:{} r2:{}".format(tr_mse, tr_rmse, tr_mae, tr_r2))
        r2s[self.freq][index_epoch] = tr_r2.numpy()

In [8]:
r2s = {2.4:{},2.5:{},2.6:{}}
r2sv = {}

In [9]:
test_dataset = pd.read_csv("Test.csv", encoding='utf-8').sample(frac=1).reset_index(drop=True)
X_v = test_dataset.loc[:,'freq':'L4'].to_numpy(dtype = np.float32)
y_v = test_dataset.loc[:,'S11r':'S41i'].to_numpy(dtype = np.float32)

In [10]:
ps = ParaServer()
ps.initModel(X_v)

2023-09-18 15:26:37.182896: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9604 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:17:00.0, compute capability: 7.5


In [11]:
nodeList = [Node('./24Train.csv', 2.4), Node('./25Train.csv', 2.5), Node('./26Train.csv', 2.6)]

In [12]:
orders = [0, 1, 2]
turn = [np.array([[26, 104], [178, 312], [344, 464], [520, 600]]), np.array([[0, 94], [149, 223], [319, 433], [464, 580]]), np.array([[32, 151], [155, 248], [270, 354], [378, 502]])]
for i in range(600):
    random.shuffle(orders)
    for j in orders:
        for l, r in turn[j]:
            if l <= i < r:
                if math.isclose(l, i):
                    nodeList[j].getZero()
                nodeList[j].train(i)
    valiAll(i)
    if i in [100, 200, 300, 400, 500]:
        ps.lr_decay(0.7)

{}


2023-09-18 15:26:38.054465: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x56090854d9c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-09-18 15:26:38.054485: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 2080 Ti, Compute Capability 7.5
2023-09-18 15:26:38.057675: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-09-18 15:26:38.173636: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2023-09-18 15:26:38.273193: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


node:2.5 epoch:0
train mse:0.09489993005990982 rmse:0.30805832147598267 mae:0.25430700182914734 r2:0.21647292375564575
mse:0.09129847586154938 rmse:0.30215635895729065 mae:0.24723972380161285 r2:0.24424201250076294
node:2.5 epoch:1
train mse:0.08360307663679123 rmse:0.2891419529914856 mae:0.22982649505138397 r2:0.2965449094772339
mse:0.08836647123098373 rmse:0.297264963388443 mae:0.2420448660850525 r2:0.2685127854347229
node:2.5 epoch:2
train mse:0.07988832145929337 rmse:0.28264522552490234 mae:0.2265326827764511 r2:0.33447039127349854
mse:0.08261458575725555 rmse:0.28742751479148865 mae:0.23318266868591309 r2:0.3161262273788452
node:2.5 epoch:3
train mse:0.0824272558093071 rmse:0.28710147738456726 mae:0.2252216637134552 r2:0.3115586042404175
mse:0.08332153409719467 rmse:0.2886546850204468 mae:0.23255713284015656 r2:0.3102741837501526
node:2.5 epoch:4
train mse:0.07597452402114868 rmse:0.275634765625 mae:0.2224964201450348 r2:0.372638463973999
mse:0.08220510929822922 rmse:0.28671431541

In [17]:
for k, v in r2s[2.6].items():
    print(v)

0.7560033
0.7512615
0.7757746
0.7504951
0.74621177
0.79969776
0.7525082
0.75210196
0.7835443
0.7627089
0.7816271
0.7884103
0.8254702
0.8214363
0.80743766
0.79521877
0.7854225
0.7792633
0.78771126
0.8001785
0.83708364
0.8176398
0.8264098
0.75868946
0.83918273
0.7978143
0.79012835
0.8428598
0.830445
0.83721614
0.83375067
0.8331964
0.8410766
0.84121865
0.8522145
0.854155
0.84359825
0.82568
0.85143805
0.8519144
0.8695765
0.8676371
0.8395523
0.8716171
0.8322081
0.85481155
0.8243389
0.86531866
0.86199605
0.8737798
0.8727813
0.8637608
0.85982955
0.8434057
0.86029005
0.8722442
0.8694769
0.8617412
0.8571735
0.8486729
0.879977
0.8597673
0.8548844
0.8842608
0.8633818
0.8730767
0.86421096
0.86715794
0.8842847
0.8626379
0.9094311
0.8968838
0.8832976
0.9062068
0.87761897
0.9002114
0.8874401
0.8744755
0.8946881
0.9002939
0.9166082
0.9381303
0.92632365
0.8998256
0.9246022
0.83201206
0.8996158
0.9294959
0.9371114
0.9236022
0.9213905
0.8670683
0.93451715
0.9299067
0.88482165
0.915848
0.878025
0.9043995
