In [None]:
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score

class FederatedLearning:
    def __init__(self, num_clients=3):
        self.num_clients = num_clients
        self.client_models = [SGDRegressor(max_iter=100, tol=1e-3, random_state=42) for _ in range(num_clients)]
        self.global_model = SGDRegressor(max_iter=100, tol=1e-3, random_state=42)
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()

    def prepare_data(self, data_path):
        data = pd.read_csv(data_path)
        data['transaction_type'] = self.label_encoder.fit_transform(data['transaction_type'])
        X = data.drop(columns=['transaction_id', 'date', 'is_split'])
        y = data['is_split']
        X = self.scaler.fit_transform(X)
        return train_test_split(X, y, test_size=0.2, random_state=42)

    def split_data(self, X, y):
        # Simulate data distribution across clients
        client_data = []
        for i in range(self.num_clients):
            start = i * len(X) // self.num_clients
            end = (i + 1) * len(X) // self.num_clients
            client_data.append((X[start:end], y[start:end]))
        return client_data

    def train_client_model(self, client_id, X, y):
        self.client_models[client_id].partial_fit(X, y)
        return self.client_models[client_id].coef_, self.client_models[client_id].intercept_

    def aggregate_models(self, client_weights):
        avg_weights = np.mean([w[0] for w in client_weights], axis=0)
        avg_intercept = np.mean([w[1] for w in client_weights], axis=0)
        self.global_model.coef_ = avg_weights
        self.global_model.intercept_ = avg_intercept

    def train(self, X_train, y_train, num_rounds=5):
        client_data = self.split_data(X_train, y_train)
        for round in range(num_rounds):
            client_weights = []
            for client_id in range(self.num_clients):
                X_client, y_client = client_data[client_id]
                weights = self.train_client_model(client_id, X_client, y_client)
                client_weights.append(weights)
            self.aggregate_models(client_weights)
            # Update client models with global model
            for client_model in self.client_models:
                client_model.coef_ = self.global_model.coef_
                client_model.intercept_ = self.global_model.intercept_

    def evaluate(self, X_test, y_test):
        y_pred = self.global_model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        accuracy = np.mean(np.abs(y_pred - y_test) < 0.1) * 100  # Accuracy within 10% tolerance
        return mse, r2, accuracy

    def save_model(self, model_path, scaler_path):
        with open(model_path, 'wb') as model_file:
            pickle.dump(self.global_model, model_file)
        with open(scaler_path, 'wb') as scaler_file:
            pickle.dump(self.scaler, scaler_file)

def main():
    fl = FederatedLearning(num_clients=3)
    X_train, X_test, y_train, y_test = fl.prepare_data("split_transactions_new.csv")
    fl.train(X_train, y_train, num_rounds=5)
    mse, r2, accuracy = fl.evaluate(X_test, y_test)
    print(f"Model Evaluation Results:\n")
    print(f"  - Mean Squared Error (MSE): {mse:.4f}")
    print(f"  - R-squared Score: {r2:.4f}")
    print(f"  - Accuracy: {accuracy:.2f}%")
    fl.save_model('federated_sgd_model.pkl', 'federated_scaler.pkl')
    print("\nFederated model and scaler saved successfully.")

if __name__ == "__main__":
    main()

def load_model_and_scaler(model_path, scaler_path):
    with open(model_path, 'rb') as model_file:
        loaded_model = pickle.load(model_file)
    with open(scaler_path, 'rb') as scaler_file:
        loaded_scaler = pickle.load(scaler_file)
    return loaded_model, loaded_scaler


Model Evaluation Results:

  - Mean Squared Error (MSE): 0.0025
  - R-squared Score: 0.9900
  - Accuracy: 96.76%

Federated model and scaler saved successfully.
