In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score
from sklearn.model_selection import train_test_split
import numpy as np

# Load the dataset into a pandas dataframe
df = pd.read_csv('ADSXLIST_07Sep2023.csv')

# Define the number of iterations and splits
num_iterations = 10
num_splits = 6

# Lists to store accuracy and precision for each iteration
local_accuracy_all_iterations = []
local_precision_all_iterations = []

for iteration in range(num_iterations):
    # Shuffle the data randomly for each iteration
    df_shuffled = df.sample(frac=1, random_state=iteration * 123)

    # Initial Splitting of the data into sections
    split_size = len(df_shuffled) // num_splits
    data_splits = [df_shuffled.iloc[i * split_size: (i + 1) * split_size] for i in range(num_splits)]

    # Data Shifting: Move 25% data from each node to the next
    for i in range(num_splits):
        next_index = (i + 1) % num_splits  # Circular shift
        data_to_shift = data_splits[i].sample(frac=0.25, random_state=iteration)
        data_splits[i] = data_splits[i].drop(data_to_shift.index)
        data_splits[next_index] = pd.concat([data_splits[next_index], data_to_shift])

    # Lists for storing local accuracy and precision
    local_accuracy_iteration = []
    local_precision_iteration = []

    for i in range(num_splits):
        split_data = data_splits[i]

        # Filter the 'AXRASH' data for Nodes 1 and 2
        if i == 0:  # Node 1
            split_data = split_data[split_data['AXRASH'] == 2]
            features = ['AXRASH']
        elif i == 1:  # Node 2
            split_data = split_data[split_data['AXCOUGH'] == 1]
            features = ['AXCOUGH']
        else:  # Nodes 3 to 6
            features = ['AXRASH','AXMUSCLE', 'AXURNFRQ', 'AXENERGY', 'AXDROWSY', 'AXDIZZY', 'AXBREATH','AXCOUGH', 'VISCODE', 'VISCODE2', 'SITEID']

        X = split_data[features].copy()
        y = split_data['Phase'].copy()

        # Data preprocessing steps
        # Convert all categorical features using one-hot encoding
        if 'VISCODE' in features or 'VISCODE2' in features or 'SITEID' in features:
            X = pd.get_dummies(X, columns=['VISCODE', 'VISCODE2', 'SITEID'])

        X.fillna(0, inplace=True)

        full_features = ['AXRASH', 'AXMUSCLE', 'AXURNFRQ', 'AXENERGY', 'AXDROWSY', 'AXDIZZY', 'AXBREATH','AXCOUGH', 'VISCODE', 'VISCODE2', 'SITEID']
        X = X.reindex(columns=full_features, fill_value=0)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=359)

        model = LogisticRegression(max_iter=1000)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        local_accuracy_iteration.append(accuracy_score(y_test, y_pred))
        local_precision_iteration.append(precision_score(y_test, y_pred, average='weighted', zero_division=0))

    # Store results
    local_accuracy_all_iterations.append(local_accuracy_iteration)
    local_precision_all_iterations.append(local_precision_iteration)

# Display and save the results
# Organize the results into a DataFrame
results_df = pd.DataFrame({
    'Local Accuracy': local_accuracy_all_iterations,
    'Local Precision': local_precision_all_iterations
})

# Save the results to a CSV file
results_df.to_csv('model_resultsl.csv', index=False)

print("Results saved to 'model_resultsl.csv'")


Results saved to 'model_resultsl.csv'


In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score
from sklearn.model_selection import train_test_split
import numpy as np

# Load the dataset into a pandas dataframe
df = pd.read_csv('ADSXLIST_07Sep2023.csv')

common_features = ['AXRASH', 'AXMUSCLE', 'AXURNFRQ', 'AXENERGY', 'AXDROWSY', 'AXDIZZY', 'AXBREATH', 'AXCOUGH']

# Define categorical features for one-hot encoding
categorical_features = ['VISCODE', 'VISCODE2', 'SITEID']
df = pd.get_dummies(df, columns=categorical_features)

# Exclude any non-numeric columns (e.g., dates or other string columns)
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()

# Ensure 'Phase' (target variable) is not in the features list
if 'Phase' in numeric_columns:
    numeric_columns.remove('Phase')

# Define the number of iterations and splits
num_iterations = 10
num_splits = 6

# Lists to store global accuracy and precision for each iteration
global_accuracy_all_iterations = []
global_precision_all_iterations = []

for iteration in range(num_iterations):

    iteration_global_accuracy = [0] * num_splits
    iteration_global_precision = [0] * num_splits

    # Shuffle the data randomly for each iteration
    df_shuffled = df.sample(frac=1, random_state=iteration * 123)

    # Initial Splitting of the data into sections
    split_size = len(df_shuffled) // num_splits
    data_splits = [df_shuffled.iloc[i * split_size: (i + 1) * split_size] for i in range(num_splits)]

    # Data Shifting: Move 25% data from each node to the next
    for i in range(num_splits):
        next_index = (i + 1) % num_splits
        data_to_shift = data_splits[i].sample(frac=0.25, random_state=iteration)
        data_splits[i] = data_splits[i].drop(data_to_shift.index)
        data_splits[next_index] = pd.concat([data_splits[next_index], data_to_shift])

    # Lists for storing model parameters
    coefficients_list = []
    intercepts_list = []

    # Train local models and collect their parameters
    for i in range(num_splits):
        split_data = data_splits[i]
        # Prepare the data for each node
        if i == 0:  # Node 1
            split_data = split_data[split_data['AXRASH'] == 2]
            features = ['AXRASH']
        elif i == 1:  # Node 2
            split_data = split_data[split_data['AXCOUGH'] == 1]
            features = ['AXCOUGH']
        else:  # Nodes 3 to 6
            features = common_features

        X = split_data[numeric_columns].copy()
        y = split_data['Phase'].copy()

        # Data preprocessing steps
        X.fillna(0, inplace=True)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=359)

        model = LogisticRegression(max_iter=1000)
        model.fit(X_train, y_train)

        coefficients_list.append(model.coef_)
        intercepts_list.append(model.intercept_)

        y_pred = model.predict(X_test)
        local_accuracy = accuracy_score(y_test, y_pred)
        local_precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)

    # Federated Averaging: Calculate mean of coefficients and intercepts
    avg_coefficients = np.mean(coefficients_list, axis=0)
    avg_intercepts = np.mean(intercepts_list, axis=0)

    global_model = LogisticRegression(max_iter=1000)
    global_model.coef_ = avg_coefficients
    global_model.intercept_ = avg_intercepts

    # Fit the global model on a small but representative subset of data to initialize 'classes_'
    subset = df_shuffled.drop_duplicates(subset='Phase').head(10)
    subset_X = subset[numeric_columns].copy()
    subset_X.fillna(0, inplace=True)
    subset_y = subset['Phase'].copy()
    global_model.fit(subset_X, subset_y)

    # Sending global model back to all nodes for testing
    for i in range(num_splits):
        split_data = data_splits[i]
        X_test = split_data[numeric_columns].copy()
        y_test = split_data['Phase'].copy()

        # Data preprocessing (if necessary)
        X_test.fillna(0, inplace=True)

        y_pred = global_model.predict(X_test)
        node_accuracy = accuracy_score(y_test, y_pred)
        node_precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)

        # Store the results for each node
        iteration_global_accuracy[i] = node_accuracy
        iteration_global_precision[i] = node_precision

    # Store global accuracy and precision for this iteration
    global_accuracy_all_iterations.append(iteration_global_accuracy)
    global_precision_all_iterations.append(iteration_global_precision)

# Organize the results into a DataFrame
results_df = pd.DataFrame({
    'Global Accuracy': global_accuracy_all_iterations,
    'Global Precision': global_precision_all_iterations
})

# Save the results to a CSV file
results_df.to_csv('model_results_global.csv', index=False)

print("Results saved to 'model_results_global.csv'")


Results saved to 'model_results_global.csv'


In [None]:
#combined
import pandas as pd

# Load the results from both CSV files
local_results_df = pd.read_csv('model_resultsl.csv')  # Replace with the actual path of the CSV file
global_results_df = pd.read_csv('model_resultsg.csv')  # Replace with the actual path of the CSV file

# Extract local accuracy and precision from Code 1 results
local_accuracy = local_results_df['Local Accuracy'].tolist()
local_precision = local_results_df['Local Precision'].tolist()

# Extract global accuracy and precision from Code 2 results
global_accuracy = global_results_df['Global Accuracy'].tolist()
global_precision = global_results_df['Global Precision'].tolist()

# Combine the local and global results into a new DataFrame
combined_results_df = pd.DataFrame({
    'Local Accuracy': local_accuracy,
    'Global Accuracy': global_accuracy,
    'Local Precision': local_precision,
    'Global Precision': global_precision
})

# Save the combined results to a CSV file
combined_results_df.to_csv('model_results_combined.csv', index=False)

print("Combined results saved to 'model_results_combined.csv'")



Combined results saved to 'model_results_combined.csv'


In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('ADSXLIST_07Sep2023.csv')

# Define the list of specific features
features = ['AXDIARRH', 'AXCONSTP', 'AXABDOMN', 'AXSWEATN' ,'AXNAUSEA',	'AXVOMIT',	'AXDIARRH','AXCONSTP'	,'AXABDOMN','AXSWEATN',	'AXDIZZY',	'AXENERGY',	'AXDROWSY',	'AXVISION',	'AXHDACHE',	'AXDRYMTH',	'AXBREATH'	,'AXCOUGH'	,'AXPALPIT'	,'AXCHEST','AXURNDIS',	'AXURNFRQ', 'AXANKLE',	'AXMUSCLE',	'AXRASH',	'AXINSOMN',	'AXDPMOOD'	,'AXCRYING'	,'AXELMOOD']

# Iterate through each feature and print value counts
for feature in features:
    value_counts = df[feature].value_counts()
    print(f"Value counts for '{feature}':")
    print(value_counts)
    print()  # Adds a blank line for better readability
