In [1]:
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from copy import deepcopy
import matplotlib.pyplot as plt
import pickle
import math
import random
import socket

import os
import optparse
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

from util.get_dataset import get_tr_test_data

# Transformer Definition (Dependencies)

# Multi-head attention with Q, K, V
class multiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, key_dim, num_heads):
        super(multiHeadAttention, self).__init__()
        self.key_dim = key_dim
        self.num_heads = num_heads

    def build(self, input_shape):
        self.WqL = []
        self.WkL = []
        self.WvL = []

        for i in range(self.num_heads):
            Wq_init = tf.random_normal_initializer()
            Wq = tf.Variable(initial_value=Wq_init(shape=(int(input_shape[-1]), self.key_dim), dtype="float32"), trainable=True)
            self.WqL.append(Wq)

            Wk_init = tf.random_normal_initializer()
            Wk = tf.Variable(initial_value=Wk_init(shape=(int(input_shape[-1]), self.key_dim), dtype="float32"), trainable=True)
            self.WkL.append(Wk)

            Wv_init = tf.random_normal_initializer()
            Wv = tf.Variable(initial_value=Wv_init(shape=(int(input_shape[-1]), int(input_shape[-1])), dtype="float32"), trainable=True)
            self.WvL.append(Wv)

        Wlt_init = init = tf.random_normal_initializer()
        self.Wlt = tf.Variable(initial_value=Wlt_init(shape=((self.num_heads * int(input_shape[-1])), int(input_shape[-1])), dtype="float32"), trainable=True)

    def call(self, inputs):

        # inputs : batch_size x time_steps x dim
        x = inputs

        # transform for generating Q,K,V : (batch_size * time_steps) x dim
        x_tran = tf.reshape(x, [-1])
        x_tran = tf.reshape(x_tran, [-1, int(inputs.shape.as_list()[-1])])

        a_xL = []

        # Generate Query, Key and Value corresponding to each attention head
        for i in range(self.num_heads):

            # Query : batch_size x time_steps x dq
            xq = tf.matmul(x_tran, self.WqL[i])
            xq = tf.reshape(xq, [-1, int(inputs.shape.as_list()[-2]), int(xq.shape.as_list()[-1])])

            # Key : batch_size x time_steps x dk
            xk = tf.matmul(x_tran, self.WkL[i])
            xk = tf.reshape(xk, [-1, int(inputs.shape.as_list()[-2]), int(xk.shape.as_list()[-1])])

            # Value : batch_size x time_steps x dv
            xv = tf.matmul(x_tran, self.WvL[i])
            xv = tf.reshape(xv, [-1, int(inputs.shape.as_list()[-2]), int(xv.shape.as_list()[-1])])

            # Transposing each key in a batch (xk_t : batch_size x dk x time_steps)
            xk_t = tf.transpose(xk, perm=[0, 2, 1])

            # Computing scaled dot product self attention of each time step in each training sample (s_a : batch_size x time_steps x time_steps)
            s_a = tf.math.multiply(tf.keras.layers.Dot(axes=(1, 2))([xk_t, xq]), (1/self.key_dim))

            # Applying Softmax Layer to the self attention weights for proper scaling (sft_s_a : batch_size x time_steps x time_steps)
            sft_s_a = tf.keras.layers.Softmax(axis=2)(s_a)

            # Computing attention augmented values for each time step and each training sample (a_x : batch_size x time_steps x dim)
            a_xL.append(tf.keras.layers.Dot(axes=(1, 2))([xv, sft_s_a]))

        # Concatenate and applying linear transform for making dimensions compatible
        a_x = tf.concat(a_xL, -1)

        # Transform to shape a_x_tran : ((batch_size x time_steps) x (dim x num_heads))
        a_x_tran = tf.reshape(a_x, [-1])
        a_x_tran = tf.reshape(a_x_tran, [-1, (self.num_heads*int(inputs.shape.as_list()[-1]))])

        # Get the dimensions compatible after applying linear transform
        a_x_tran = tf.matmul(a_x_tran, self.Wlt)
        a_x_tran = tf.reshape(a_x_tran, [-1, int(inputs.shape.as_list()[-2]), int(inputs.shape.as_list()[-1])])

        return a_x_tran


# Transformer Block implemented as a Layer
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = multiHeadAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class PositionEmbeddingLayer(layers.Layer):
    def __init__(self, sequence_length, output_dim, **kwargs):
        super(PositionEmbeddingLayer, self).__init__(**kwargs)
        self.position_embedding_layer = layers.Embedding(
            input_dim=(sequence_length), output_dim=output_dim
        )
        self.sequence_length = sequence_length

    def call(self, inputs):
        position_indices = tf.range(self.sequence_length)  #tf.range(1, self.sequence_length + 1, 1)
        embedded_words = inputs
        embedded_indices = self.position_embedding_layer(position_indices)
        return embedded_words + embedded_indices
    

# Creating the model
# Initializing the transformer model
def get_transformer_model(num_features, num_attn_heads, hidden_layer_dim, num_transformer_blocks, time_dim):
  transformer_blocks = []

  for i in range(num_transformer_blocks):
      transformer_blocks.append(TransformerBlock(num_features, num_attn_heads, hidden_layer_dim))

  # Model
  inputs = layers.Input(shape=(time_dim, num_features,))
  x = inputs

  # Trainable Embedding
  embedding_layer = PositionEmbeddingLayer(50, num_features)
  x = embedding_layer(x)

  for i in range(num_transformer_blocks):
      x = transformer_blocks[i](x)

  x = layers.GlobalAveragePooling1D()(x)
  x = layers.Dropout(0.2)(x)
  x = layers.Dense(32, activation="relu")(x)
  x = layers.Dropout(0.2)(x)
  outputs = layers.Dense(1)(x)

  model = keras.Model(inputs=inputs, outputs=outputs)

  optim = keras.optimizers.SGD(learning_rate=0.0001)
  model.compile(optimizer=optim, loss='mse', metrics=['mse'])

  return model

def aggregate_weights(client_weights):
    """Aggregate the weights from multiple clients by averaging them.
    
    Args:
        client_weights (list): A list of lists containing the weights from each client.
    
    Returns:
        list: A list containing the averaged weights.
    """
    # Stack the weights along a new dimension
    stacked_weights = [np.stack([client_weights[j][i] for j in range(len(client_weights))], axis=0).astype(np.float32) for i in range(len(client_weights[0]))]
    
    # Calculate the average along the new dimension
    averaged_weights = [np.average(weight, axis=0) for weight in stacked_weights]
    
    return averaged_weights

keras.utils.get_custom_objects().update({"PositionEmbeddingLayer": PositionEmbeddingLayer})
keras.utils.get_custom_objects().update({"TransformerBlock":TransformerBlock})


2024-10-30 09:59:52.247186: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-30 09:59:52.417500: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [59]:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
tmp_file = open("result/fl_loss_0.01_E_1_B_10.pkl", "rb")
coding_data = pickle.load(tmp_file)
tmp_file = open("result/fl_base_loss_0.01_E_1_B_10.pkl", "rb")
base_data = pickle.load(tmp_file)

In [60]:
client0_loss = np.concatenate([[tmp_history.history['loss'][-1]] for tmp_history in coding_data[0]])
client1_loss = np.concatenate([[tmp_history.history['loss'][-1]] for tmp_history in coding_data[1]])
client2_loss = np.concatenate([[tmp_history.history['loss'][-1]] for tmp_history in coding_data[2]])
client3_loss = np.concatenate([[tmp_history.history['loss'][-1]] for tmp_history in coding_data[3]])

client0_mse = np.concatenate([[tmp_history.history['mse'][-1]] for tmp_history in coding_data[0]])
client1_mse = np.concatenate([[tmp_history.history['mse'][-1]] for tmp_history in coding_data[1]])
client2_mse = np.concatenate([[tmp_history.history['mse'][-1]] for tmp_history in coding_data[2]])
client3_mse = np.concatenate([[tmp_history.history['mse'][-1]] for tmp_history in coding_data[3]])

client0_loss_base = np.concatenate([[tmp_history.history['loss'][-1]] for tmp_history in base_data[0]])
client1_loss_base = np.concatenate([[tmp_history.history['loss'][-1]] for tmp_history in base_data[1]])
client2_loss_base = np.concatenate([[tmp_history.history['loss'][-1]] for tmp_history in base_data[2]])
client3_loss_base = np.concatenate([[tmp_history.history['loss'][-1]] for tmp_history in base_data[3]])

client0_mse_base = np.concatenate([[tmp_history.history['mse'][-1]] for tmp_history in base_data[0]])
client1_mse_base = np.concatenate([[tmp_history.history['mse'][-1]] for tmp_history in base_data[1]])
client2_mse_base = np.concatenate([[tmp_history.history['mse'][-1]] for tmp_history in base_data[2]])
client3_mse_base = np.concatenate([[tmp_history.history['mse'][-1]] for tmp_history in base_data[3]])

client0_val_loss = np.concatenate([[tmp_history.history['val_loss'][-1]] for tmp_history in coding_data[0]])
client1_val_loss = np.concatenate([[tmp_history.history['val_loss'][-1]] for tmp_history in coding_data[1]])
client2_val_loss = np.concatenate([[tmp_history.history['val_loss'][-1]] for tmp_history in coding_data[2]])
client3_val_loss = np.concatenate([[tmp_history.history['val_loss'][-1]] for tmp_history in coding_data[3]])

client0_val_mse = np.concatenate([[tmp_history.history['val_mse'][-1]] for tmp_history in coding_data[0]])
client1_val_mse = np.concatenate([[tmp_history.history['val_mse'][-1]] for tmp_history in coding_data[1]])
client2_val_mse = np.concatenate([[tmp_history.history['val_mse'][-1]] for tmp_history in coding_data[2]])
client3_val_mse = np.concatenate([[tmp_history.history['val_mse'][-1]] for tmp_history in coding_data[3]])

client0_val_loss_base = np.concatenate([[tmp_history.history['val_loss'][-1]] for tmp_history in base_data[0]])
client1_val_loss_base = np.concatenate([[tmp_history.history['val_loss'][-1]] for tmp_history in base_data[1]])
client2_val_loss_base = np.concatenate([[tmp_history.history['val_loss'][-1]] for tmp_history in base_data[2]])
client3_val_loss_base = np.concatenate([[tmp_history.history['val_loss'][-1]] for tmp_history in base_data[3]])

client0_val_mse_base = np.concatenate([[tmp_history.history['val_mse'][-1]] for tmp_history in base_data[0]])
client1_val_mse_base = np.concatenate([[tmp_history.history['val_mse'][-1]] for tmp_history in base_data[1]])
client2_val_mse_base = np.concatenate([[tmp_history.history['val_mse'][-1]] for tmp_history in base_data[2]])
client3_val_mse_base = np.concatenate([[tmp_history.history['val_mse'][-1]] for tmp_history in base_data[3]])

coding_loss = np.array([client0_loss,client1_loss,client2_loss,client3_loss])
coding_mse = np.array([client0_mse,client1_mse,client2_mse,client3_mse])

base_loss = np.array([client0_loss_base,client1_loss_base,client2_loss_base,client3_loss_base])
base_mse = np.array([client0_mse_base,client1_mse_base,client2_mse_base,client3_mse_base])

coding_val_loss = np.array([client0_val_loss,client1_val_loss,client2_val_loss,client3_val_loss])
coding_val_mse = np.array([client0_val_mse,client1_val_mse,client2_val_mse,client3_val_mse])

base_val_loss = np.array([client0_val_loss_base,client1_val_loss_base,client2_val_loss_base,client3_val_loss_base])
base_val_mse = np.array([client0_val_mse_base,client1_val_mse_base,client2_val_mse_base,client3_val_mse_base])

In [61]:
data = {
    'coding_loss':coding_loss,
    'coding_mse':coding_mse,
    'base_loss': base_loss,
    'base_mse': base_mse,
    'coding_val_loss': coding_val_loss,
    'coding_val_mse': coding_val_mse,
    'base_val_loss': base_val_loss,
    'base_val_mse': base_val_mse
}
np.savez('figure_data/l1e1b10.npz',**data)