In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import random
import torch
import torch.backends.cudnn as cudnn
from sklearn.neural_network import MLPRegressor

os.chdir('Resources/')

import warnings
warnings.filterwarnings("ignore")

In [2]:
def power(b, p, m):
    b %= m
    if p == 0:
        return 1
    j = power(b, p // 2, m)
    j = (j * j) % m
    if p % 2 == 1:
        j = (j * b) % m
    return j

def mod_inv(a, m):
    origin_m = m
    y, x = 0, 1
    if m == 1:
        return 0
    while a > 1:
        q = a // m
        t = m
        m = a % m
        a = t
        t = y
        y = x - q * y
        x = t
    if x < 0:
        x += origin_m
    return x

def encode_signed(m, p):
    return m + 9

def decode_signed(m_encoded, p):
    return m_encoded - (9 * 9)

def encrypt_additive(m, h, g, p, y=7):
    m_enc = encode_signed(m, p)
    c1 = power(g, y, p)
    s = power(h, y, p)
    c2 = (power(g, m_enc, p) * s) % p
    return c1, c2

def decrypt_additive(ciphertext, x, p, g):
    c1, c2 = ciphertext
    s = power(c1, x, p)
    s_inv = mod_inv(s, p)
    m_encoded = (c2 * s_inv) % p
    for m in range(p - 1):
        if power(g, m, p) == m_encoded:
            return decode_signed(m, p)
    return None

In [3]:
def reset_seeds(seed=42):
    import os
    import random
    import numpy as np
    import tensorflow as tf
    import torch
    import torch.backends.cudnn as cudnn

    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    torch.manual_seed(seed)
    cudnn.deterministic = True
    cudnn.benchmark = False

def preprocess_dataset(df, seed=42):
    reset_seeds()

    X = df[['Plant_ID', 'Machine_Type', 'Quality_Audit', 'Year', 'Month', 'Week']].values
    y = df['Weekly_Production'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    
    return X, X_test, y, y_test


def build_mlp(input_dim):
    reset_seeds()
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1.0)
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(100, activation='relu', input_shape=(input_dim,)),
        tf.keras.layers.Dense(50, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer=optimizer, loss='mse')
    return model

def train_local(X_train, X_test, y_train, y_test):
    reset_seeds()
    model = build_mlp(X_train.shape[1])
    model.fit(X_train, y_train, epochs=1000, batch_size=32, verbose=0)
    y_pred = model.predict(X_test)
    return model, r2_score(y_test, y_pred)

def get_gradients_and_flatten(model, X_train, y_train):
    mse_loss_fn = tf.keras.losses.MeanSquaredError()
    X_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)
    y_tensor = tf.convert_to_tensor(y_train.reshape(-1, 1), dtype=tf.float32)
    
    with tf.GradientTape() as tape:
        predictions = model(X_tensor, training=True)
        loss = mse_loss_fn(y_tensor, predictions)
    
    gradients = tape.gradient(loss, model.trainable_variables)
    flat_grads = []
    for g in gradients:
        flat = tf.reshape(g, [-1]).numpy()
        flat_grads.extend(flat)
    
    return flat_grads

def add_noise_to_gradients(flat_grads, noise_stddev=1.0):
    return [g + tf.random.normal(shape=g.shape, stddev=noise_stddev) for g in flat_grads]

def reconstruct_grads(flat_grads, model):
    reconstructed = []
    idx = 0
    for var in model.trainable_variables:
        shape = var.shape
        size = np.prod(shape)
        chunk = flat_grads[idx:idx + size]
        tensor = tf.convert_to_tensor(np.array(chunk, dtype=np.float32).reshape(shape))
        reconstructed.append(tensor)
        idx += size
    return reconstructed

def apply_gradients(model, avg_grads):
    reset_seeds()
    optimizer = tf.keras.optimizers.Adam()
    optimizer.apply_gradients(zip(avg_grads, model.trainable_variables))

def fine_tune(model, X_train, y_train):
    reset_seeds()
    model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0)

In [4]:
p = 1009 # prime number
g = 11 # generator
x = 5 # private key
h = power(g, x, p) # public key (p, g, h)

clients = [f"2_{i}_Client_Data_" for i in range(1, 10)]
rounds = [
    "2010_2", "2010_3", "2010_4", "2010_5", "2010_6", "2010_7", "2010_8", "2010_9", "2010_10", "2010_11", "2010_12",
    "2011_1", "2011_2", "2011_3", "2011_4", "2011_5", "2011_6", "2011_7", "2011_8", "2011_9", "2011_10", "2011_11", "2011_12",
    "2012_1", "2012_2", "2012_3", "2012_4", "2012_5", "2012_6", "2012_7", "2012_8", 
    #"2012_9", "2012_10"
    ]

client_models = {}

client_r2_history = {i: [] for i in range(9)}

In [5]:
print("Round 2010_2")
train_data = {}
for idx, client in enumerate(clients):
    path = client + rounds[0] + ".csv"
    df = pd.read_csv(path)
    X_train, X_test, y_train, y_test = preprocess_dataset(df, seed=42)
    model, r2 = train_local(X_train, X_test, y_train, y_test)
    client_r2_history[idx].append(r2)
    print(f"R² of Client_{idx+1} ({rounds[0]}): {r2:.4f}")
    train_data[idx] = (X_train, X_test, y_train, y_test)
    client_models[idx] = model


Round 2010_2
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
R² of Client_1 (2010_2): 0.9999
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
R² of Client_2 (2010_2): 0.0200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
R² of Client_3 (2010_2): 0.9997
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
R² of Client_4 (2010_2): 0.9996
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
R² of Client_5 (2010_2): 0.7836
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
R² of Client_6 (2010_2): 0.9994
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
R² of Client_7 (2010_2): 0.9998
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
R² of Client_8 (2010_2): 0.9994
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
R² of Client_9 (2010_2): 0.9995


In [6]:
import time

start_time = time.time()

scaled_grads_list = []

for i, (idx, model) in enumerate(client_models.items()):
    X_train, _, y_train, _ = train_data[idx]
    grad = get_gradients_and_flatten(model, X_train, y_train)
    grad = [int(x * 1_000_000) for x in grad]  # scale to int
    print(f"Grad {i}:\n{grad}")
    scaled_grads_list.append(grad)

scaled_grads_splits = [[] for _ in range(16)]  # 16 digit positions

for grad in scaled_grads_list:
    for i in range(16):
        part = [int(str(abs(x)).zfill(16)[i]) * (-1 if x < 0 else 1) for x in grad]
        scaled_grads_splits[i].append(part) 

decrypted_sums_list = []

for j in range(16):  
    encrypted_grads = []

    for grad in scaled_grads_splits[j]: 
        enc = [encrypt_additive(m, h, g, p) for m in grad]
        encrypted_grads.append(enc)

    from collections import defaultdict
    enc_sums = defaultdict(lambda: (1, 1))

    for grad in encrypted_grads:
        for i, (c1, c2) in enumerate(grad):
            prev_c1, prev_c2 = enc_sums[i]
            enc_sums[i] = ((prev_c1 * c1) % p, (prev_c2 * c2) % p)

    decrypted_sums = []
    for i in sorted(enc_sums.keys()):
        decrypted = decrypt_additive(enc_sums[i], x, p, g)
        decrypted_sums.append(decrypted)

    decrypted_sums_list.append(decrypted_sums)

grad_sums = []

for i in range(len(decrypted_sums_list[0])):
    result = 0
    for j in range(16):
        digit = decrypted_sums_list[j][i]
        result += digit * (10 ** (15 - j))
    grad_sums.append(result / 1_000_000)

print("Sum of Gradients:", grad_sums)

grads_avg = [val / len(client_models) for val in grad_sums]
print("Average of Gradients:", grads_avg)

end_time = time.time()
print(f"Total time: {end_time - start_time:.2f} seconds")

Grad 0:
[-124043212, -12660293, -346210418, 0, 0, 0, -227913497, 199307601, -344809387, 0, -342722595, -350888732, -115159660, -9506560546, -565449707, -1623531, 211443099, -482686187, 0, 0, 0, 199359771, 0, 0, 0, 0, 0, -194833679, 160492156, 0, 219608413, 0, 0, 0, 0, 343291900, 0, 0, 0, -182670547, -496260406, 0, 0, 160821945, 0, -11707053710, 0, -4994146484, 0, -600630798, 0, 0, -5672741, 0, -5627257324, 0, 0, -186006484, 0, 0, 0, 258376129, 611496826, 0, 0, 105977020, 0, 330403442, 0, 0, 0, 0, 309689941, -448555541, 0, 0, 0, 0, 0, 225309875, 0, 334858825, 0, 473792419, -9771427734, -6460286132, 0, 0, 305209503, 430681213, 0, 0, -146387924, 0, 0, 148696990, -179765472, -5943419433, 291036804, 0, 4883692871, 683571655, 13549728515, 0, 0, 0, 8839670898, -7775488281, 13519633789, 0, 13377932617, 13658075195, 4486717773, -257281312500, 22043632812, 31583099, -8173721191, 18851343750, 0, 0, 0, -7791199218, 0, 0, 0, 0, 0, 7524835937, -6254746582, 0, -8546186523, 0, 0, 0, 0, -13380316406, 0

In [None]:
import time

start_time = time.time()

for round_id in rounds[1:]:
    print(f"\nRound ({round_id})")
    new_train_data = {}
    new_client_models = {}

    round_index = rounds.index(round_id)

    for idx, client in enumerate(clients):
        dfs = []
        for r in rounds[:round_index + 1]:  # all previous rounds including current
            path = f"{client}{r}.csv"      # fixed file naming convention
            df_ind = pd.read_csv(path)
            dfs.append(df_ind)

        df = pd.concat(dfs, ignore_index=True)

        X_train, X_test, y_train, y_test = preprocess_dataset(df, seed=42)

        model = build_mlp(X_train.shape[1])
        model(X_train[:1])
        avg_grads_tensor = reconstruct_grads(grads_avg, model)
        apply_gradients(model, avg_grads_tensor)

        fine_tune(model, X_train, y_train)
        r2 = r2_score(y_test, model.predict(X_test))
        client_r2_history[idx].append(r2)
        print(f"R² of Client_{idx+1} ({round_id}): {r2:.4f}")

        new_train_data[idx] = (X_train, X_test, y_train, y_test)
        new_client_models[idx] = model

    scaled_grads_list = []

    for i, (idx, model) in enumerate(client_models.items()):
        X_train, _, y_train, _ = train_data[idx]
        grad = get_gradients_and_flatten(model, X_train, y_train)
        grad = [int(x * 1_000_000) for x in grad]  # scale to int
        #print(f"Grad {i}:\n{grad}")
        scaled_grads_list.append(grad)

    scaled_grads_splits = [[] for _ in range(16)]  # 16 digit positions

    for grad in scaled_grads_list:
        for i in range(16):
            part = [int(str(abs(x)).zfill(16)[i]) * (-1 if x < 0 else 1) for x in grad]
            scaled_grads_splits[i].append(part) 

    decrypted_sums_list = []

    for j in range(16):  
        encrypted_grads = []

        for grad in scaled_grads_splits[j]: 
            enc = [encrypt_additive(m, h, g, p) for m in grad]
            encrypted_grads.append(enc)

        from collections import defaultdict
        enc_sums = defaultdict(lambda: (1, 1))

        for grad in encrypted_grads:
            for i, (c1, c2) in enumerate(grad):
                prev_c1, prev_c2 = enc_sums[i]
                enc_sums[i] = ((prev_c1 * c1) % p, (prev_c2 * c2) % p)

        decrypted_sums = []
        for i in sorted(enc_sums.keys()):
            decrypted = decrypt_additive(enc_sums[i], x, p, g)
            decrypted_sums.append(decrypted)

        decrypted_sums_list.append(decrypted_sums)

    grad_sums = []

    for i in range(len(decrypted_sums_list[0])):
        result = 0
        for j in range(16):
            digit = decrypted_sums_list[j][i]
            result += digit * (10 ** (15 - j))
        grad_sums.append(result / 1_000_000)

    #print("Sum of Gradients:", grad_sums)

    grads_avg = [val / len(client_models) for val in grad_sums]
    #print("Average of Gradients:", grads_avg)

    client_models = new_client_models
    train_data = new_train_data

end_time = time.time()
print(f"Total time: {end_time - start_time:.2f} seconds")


Round (2010_3)
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
R² of Client_1 (2010_3): 0.1490
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
R² of Client_2 (2010_3): 0.0025
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
R² of Client_3 (2010_3): 0.1501
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
R² of Client_4 (2010_3): 0.0658
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
R² of Client_5 (2010_3): 0.3544
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
R² of Client_6 (2010_3): 0.2250
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
R² of Client_7 (2010_3): 0.2080
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
R² of Client_8 (2010_3): 0.2589
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
R² of Client_9 (2010_3): 0.2145

Round (2010_4)
[1m29/29[0m [32m━━

In [None]:
import joblib

for idx, model in new_client_models.items():
    path = f"3_{idx+1}_Client_Model.pkl"
    joblib.dump(model, path)

In [None]:
for i in range (0, 9):
    model = client_models[i]

    df = pd.read_csv(f"2_{i+1}_Client_Data_2012_9.csv")
    X_new, _, y_new, _ = preprocess_dataset(df, seed=42)

    y_pred = model.predict(X_new)

    from sklearn.metrics import r2_score
    r2 = r2_score(y_new, y_pred)
    print(f"R² of Client_{i+1} model on new_data.csv: {r2:.4f}")

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_1 model on new_data.csv: 0.9996
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_2 model on new_data.csv: 0.0376
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_3 model on new_data.csv: 1.0000
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
R² of Client_4 model on new_data.csv: 0.9972
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_5 model on new_data.csv: 0.9989
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_6 model on new_data.csv: 0.9997
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
R² of Client_7 model on new_data.csv: 0.9995
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_8 model on new_data.csv: 1.0000
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [None]:
for i in range (0, 9):
    model = client_models[i]

    df = pd.read_csv(f"2_{i+1}_Client_Data_2012_10.csv")
    X_new, _, y_new, _ = preprocess_dataset(df, seed=42)

    y_pred = model.predict(X_new)

    from sklearn.metrics import r2_score
    r2 = r2_score(y_new, y_pred)
    print(f"R² of Client_{i+1} model on new_data.csv: {r2:.4f}")

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
R² of Client_1 model on new_data.csv: 0.9996
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_2 model on new_data.csv: 0.0355
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
R² of Client_3 model on new_data.csv: 1.0000
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_4 model on new_data.csv: 0.9971
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_5 model on new_data.csv: 0.9989
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
R² of Client_6 model on new_data.csv: 0.9997
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_7 model on new_data.csv: 0.9995
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
R² of Client_8 model on new_data.csv: 1.0000
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

data = []
for client_id, r2_list in client_r2_history.items():
    for round_idx, r2 in enumerate(r2_list):
        data.append({
            "Client": f"Client {client_id + 1}",
            "Round": rounds[round_idx],
            "R² Score": r2
        })

df_r2 = pd.DataFrame(data)
df_r2.to_csv("3_Client_r2_History.csv", index=False)