Results saved to 'model_results_with_global_model_sent_back.csv'


In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score
from sklearn.model_selection import train_test_split
import numpy as np

# Load the dataset into a pandas dataframe
df = pd.read_csv('ADSXLIST_07Sep2023.csv')

# Define categorical features for one-hot encoding
categorical_features = ['VISCODE', 'VISCODE2', 'SITEID']
df = pd.get_dummies(df, columns=categorical_features)

# Define a fixed set of features to be used by all nodes
selected_features = ['AXRASH','AXMUSCLE', 'AXURNFRQ', 'AXENERGY', 'AXDROWSY', 'AXDIZZY', 'AXBREATH','AXCOUGH'] + df.columns[df.columns.str.startswith(('VISCODE', 'VISCODE2', 'SITEID'))].tolist()

# Define the number of iterations and splits
num_iterations = 10
num_splits = 6

# Initialize dictionaries to store accuracy and precision for each node across iterations
global_accuracy = {f'Node {i+1}': [] for i in range(num_splits)}
global_precision = {f'Node {i+1}': [] for i in range(num_splits)}

for iteration in range(num_iterations):
    # Shuffle the data randomly for each iteration
    df_shuffled = df.sample(frac=1, random_state=iteration * 123)

    # Initial Splitting of the data into sections
    split_size = len(df_shuffled) // num_splits
    data_splits = [df_shuffled.iloc[i * split_size: (i + 1) * split_size] for i in range(num_splits)]

    # Data Shifting: Move 25% data from each node to the next
    for i in range(num_splits):
        next_index = (i + 1) % num_splits
        data_to_shift = data_splits[i].sample(frac=0.25, random_state=iteration)
        data_splits[i] = data_splits[i].drop(data_to_shift.index)
        data_splits[next_index] = pd.concat([data_splits[next_index], data_to_shift])

    # Lists for storing model parameters
    coefficients_list = []
    intercepts_list = []

    # Train local models and collect their parameters
    for i in range(num_splits):
        split_data = data_splits[i]

        X = split_data[selected_features].copy()
        y = split_data['Phase'].copy()

        # Data preprocessing steps
        X.fillna(0, inplace=True)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=359)

        model = LogisticRegression(max_iter=1000)
        model.fit(X_train, y_train)

        coefficients_list.append(model.coef_)
        intercepts_list.append(model.intercept_)

    # Federated Averaging: Calculate mean of coefficients and intercepts
    avg_coefficients = np.mean(coefficients_list, axis=0)
    avg_intercepts = np.mean(intercepts_list, axis=0)

    # Create the global model using the averaged coefficients and intercepts
    global_model = LogisticRegression(max_iter=1000)
    global_model.coef_ = avg_coefficients
    global_model.intercept_ = avg_intercepts
    global_model.classes_ = np.unique(y)

    # Send the global model back to all nodes and perform testing
    for i in range(num_splits):
        split_data = data_splits[i]

        X_test = split_data[selected_features].copy()
        y_test = split_data['Phase'].copy()

        # Data preprocessing (if necessary)
        X_test.fillna(0, inplace=True)

        # Use the global model to predict on the node's test data
        y_pred = global_model.predict(X_test.to_numpy())

        # Calculate accuracy and precision for the node
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)

        # Store the results for this node for this iteration
        global_accuracy[f'Node {i+1}'].append(accuracy)
        global_precision[f'Node {i+1}'].append(precision)

# Organize the results into DataFrames, one for accuracy and one for precision
accuracy_df = pd.DataFrame(global_accuracy)
accuracy_df.index.name = 'Iteration'
precision_df = pd.DataFrame(global_precision)
precision_df.index.name = 'Iteration'

# Save the results to CSV files
accuracy_df.to_csv('model_accuracy_with_global_model_sent_back.csv')
precision_df.to_csv('model_precision_with_global_model_sent_back.csv')

print("Accuracy results saved to 'model_accuracy_with_global_model_sent_back.csv'")
print("Precision results saved to 'model_precision_with_global_model_sent_back.csv'")


Accuracy results saved to 'model_accuracy_with_global_model_sent_back.csv'
Precision results saved to 'model_precision_with_global_model_sent_back.csv'
