In [235]:
import h5py
import numpy as np
import pandas as pd
import os

## Maximal accuracy

### each algorithm is run at least 10 times and then the results are averaged and stored in the XXX_avg.h5 files

In [236]:
def simple_read_data(alg, folder=""):
    """
    Read training accuracy, training loss, and global accuracy from an HDF5 file.

    Parameters:
    alg (str): The name of the algorithm.
    folder (str): The folder name where the HDF5 file is located.

    Returns:
    tuple: A tuple containing training accuracy, training loss, and global accuracy.
    """
    
    path = os.path.join("./results_2", folder, '{}.h5'.format(alg))
    hf = h5py.File(path, 'r')
    rs_glob_acc = np.array(hf.get('rs_glob_acc')[:])
    rs_train_acc = np.array(hf.get('rs_train_acc')[:])
    rs_train_loss = np.array(hf.get('rs_train_loss')[:])
    return rs_train_acc, rs_train_loss, rs_glob_acc

def get_training_data_value(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],beta=[],algorithms_list=[], batch_size=[], dataset="", k= [] , personal_learning_rate = [], folder=""):
    """
    Get training accuracy, training loss, and global accuracy data from HDF5 files.

    Parameters:
    num_users (int): Number of users.
    loc_ep1 (int): Number of local epochs.
    Numb_Glob_Iters (int): Number of global iterations.
    lamb (list): List of lambda values.
    learning_rate (list): List of learning rates.
    beta (list): List of beta values.
    algorithms_list (list): List of algorithm names.
    batch_size (list): List of batch sizes.
    dataset (str): Name of the dataset.
    k (list): List of k values.
    personal_learning_rate (list): List of personal learning rates.
    folder (str): The folder name where the HDF5 files are located.

    Returns:
    tuple: A tuple containing global accuracy, training accuracy, and training loss data.
    """

    Numb_Algs = len(algorithms_list)
    train_acc = np.zeros((Numb_Algs, Numb_Glob_Iters))
    train_loss = np.zeros((Numb_Algs, Numb_Glob_Iters))
    glob_acc = np.zeros((Numb_Algs, Numb_Glob_Iters))
    algs_lbl = algorithms_list.copy()

    for i in range(Numb_Algs):
        string_learning_rate = str(learning_rate[i])  
        string_learning_rate = string_learning_rate + "_" +str(beta[i]) + "_" +str(lamb[i])
        if(algorithms_list[i] == "pFedMe" or algorithms_list[i] == "pFedMe_p"):
            algorithms_list[i] = algorithms_list[i] + "_" + string_learning_rate + "_" + str(num_users) + "u" + "_" + str(batch_size[i]) + "b" + "_" +str(loc_ep1[i]) + "_"+ str(k[i])  + "_"+ str(personal_learning_rate[i])
        else:
            algorithms_list[i] = algorithms_list[i] + "_" + string_learning_rate + "_" + str(num_users) + "u" + "_" + str(batch_size[i]) + "b"  "_" +str(loc_ep1[i])
        train_acc[i, :], train_loss[i, :], glob_acc[i, :] = np.array(
            simple_read_data(dataset +"_"+ algorithms_list[i] + "_avg", folder))[:, :Numb_Glob_Iters]
        algorithms_list[i] = algs_lbl[i]
    return glob_acc, train_acc, train_loss

def get_max_value_index(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=[], learning_rate=[],beta=[],algorithms_list=[], batch_size=[], dataset="", k= [] , personal_learning_rate = [], folder=""):
    """
    Print the maximum testing accuracy and its index for each algorithm.

    Parameters:
    num_users (int): Number of users.
    loc_ep1 (int): Number of local epochs.
    Numb_Glob_Iters (int): Number of global iterations.
    lamb (list): List of lambda values.
    learning_rate (list): List of learning rates.
    beta (list): List of beta values.
    algorithms_list (list): List of algorithm names.
    batch_size (list): List of batch sizes.
    dataset (str): Name of the dataset.
    k (list): List of k values.
    personal_learning_rate (list): List of personal learning rates.
    folder (str): The folder name where the HDF5 files are located.
    """

    results = []

    Numb_Algs = len(algorithms_list)
    glob_acc, train_acc, train_loss = get_training_data_value(num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, beta, algorithms_list, batch_size, dataset, k, personal_learning_rate, folder)
    


    for i in range(Numb_Algs):
        '''print("Algorithm: ", algorithms_list[i],
            "Max testing Accurancy: ", glob_acc[i].max(), 
            "Index: ", np.argmax(glob_acc[i]),
            "local update:", loc_ep1[i])'''
        results.append({
            "Algorithm": algorithms_list[i],
            "Folder": folder,
            "Max testing Accuracy": glob_acc[i].max(),
            "Index": np.argmax(glob_acc[i]),
            #"local update": loc_ep1
        })

    return pd.DataFrame(results)

In [237]:
algorithms = ["pFedMe_p", "pFedMe", "PerAvg_p", "FedAvg"]
dataset = "Mnist"
Numb_Glob_Iters = 800
folders = ["results_DNN", "results_MLR"]
num_users = 5

params = {
    "results_DNN": {
        "learning_rates": [0.01, 0.01, 0.02, 0.02],
        "betas": [2.0, 2.0, 0.001, 1.0],
        "lambdas": [30, 30, 15, 15],
        "personal_learning_rate": [0.05, 0.05, 0.05, 0.05],
        "local_epochs": [20, 20, 20, 20],
        "K": [5, 5, 5, 5],
        "batch_sizes": [20, 20, 20, 20]
    },
    "results_MLR": {
        "learning_rates": [0.01, 0.01, 0.03, 0.02],
        "betas": [2.0, 2.0, 0.003, 1.0],
        "lambdas": [15, 15, 15, 15],
        "personal_learning_rate": [0.1, 0.1, 0.1, 0.1],
        "local_epochs": [20, 20, 20, 20],
        "K": [5, 5, 5, 5],
        "batch_sizes": [20, 20, 20, 20]
    }
}

results_all = []

for folder in folders:
        results_df = get_max_value_index(
            num_users=num_users,
            loc_ep1=params[folder]["local_epochs"],
            Numb_Glob_Iters=Numb_Glob_Iters,
            lamb=params[folder]["lambdas"],
            learning_rate=params[folder]["learning_rates"],
            beta=params[folder]["betas"],
            algorithms_list=algorithms,
            batch_size=params[folder]["batch_sizes"],
            dataset=dataset,
            k=params[folder]["K"],
            personal_learning_rate=params[folder]["personal_learning_rate"],
            folder=folder
        )
        results_all.append(results_df)

# Concatenate all DataFrames
all_results_df = pd.concat(results_all, ignore_index=True)

all_results_df

Unnamed: 0,Algorithm,Folder,Max testing Accuracy,Index
0,pFedMe_p,results_DNN,0.967495,255
1,pFedMe,results_DNN,0.962392,768
2,PerAvg_p,results_DNN,0.94155,794
3,FedAvg,results_DNN,0.960448,570
4,pFedMe_p,results_MLR,0.93939,76
5,pFedMe,results_MLR,0.919438,250
6,PerAvg_p,results_MLR,0.933477,470
7,FedAvg,results_MLR,0.925,743


### Outputs the maximal accuracy for each file

In [238]:
def simple_read_data(alg, folder=""):
    """
    Read training accuracy, training loss, and global accuracy from an HDF5 file.

    Parameters:
    alg (str): The name of the algorithm.
    folder (str): The folder name where the HDF5 file is located.

    Returns:
    tuple: A tuple containing training accuracy, training loss, and global accuracy.
    """
    
    path = os.path.join("./results_2", folder, '{}.h5'.format(alg))
    hf = h5py.File(path, 'r')
    rs_glob_acc = np.array(hf.get('rs_glob_acc')[:])
    rs_train_acc = np.array(hf.get('rs_train_acc')[:])
    rs_train_loss = np.array(hf.get('rs_train_loss')[:])
    return rs_train_acc, rs_train_loss, rs_glob_acc

def get_all_training_data_value(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=0, learning_rate=0, beta=0, algorithms="", batch_size=0, dataset="", k=0, personal_learning_rate=0, times=10, folder=""):
    """
    Get training accuracy, training loss, and global accuracy data from HDF5 files.

    Parameters:
    num_users (int): Number of users.
    loc_ep1 (int): Number of local epochs.
    Numb_Glob_Iters (int): Number of global iterations.
    lamb (float): Lambda value.
    learning_rate (float): Learning rate.
    beta (float): Beta value.
    algorithms (str): Name of the algorithm.
    batch_size (int): Batch size.
    dataset (str): Name of the dataset.
    k (int): k value.
    personal_learning_rate (float): Personal learning rate.
    times (int): Number of times to repeat the experiment.
    folder (str): The folder name where the HDF5 files are located.

    Returns:
    tuple: A tuple containing global accuracy, training accuracy, and training loss data.
    """

    train_acc = np.zeros((times, Numb_Glob_Iters))
    train_loss = np.zeros((times, Numb_Glob_Iters))
    glob_acc = np.zeros((times, Numb_Glob_Iters))
    algorithms_list = [algorithms] * times

    for i in range(times):
        string_learning_rate = str(learning_rate)
        string_learning_rate = string_learning_rate + \
            "_" + str(beta) + "_" + str(lamb)
        if(algorithms == "pFedMe" or algorithms == "pFedMe_p"):
            algorithms_list[i] = algorithms_list[i] + "_" + string_learning_rate + "_" + \
                str(num_users) + "u" + "_" + str(batch_size) + "b" + "_" + \
                str(loc_ep1) + "_" + str(k) + "_" + \
                str(personal_learning_rate) + "_" + str(i)
        else:
            algorithms_list[i] = algorithms_list[i] + "_" + string_learning_rate + "_" + \
                str(num_users) + "u" + "_" + str(batch_size) + "b" + \
                "_" + str(loc_ep1) + "_" + str(i)

        train_acc[i, :], train_loss[i, :], glob_acc[i, :] = np.array(
            simple_read_data(dataset + "_" + algorithms_list[i], folder))[:, :Numb_Glob_Iters]
    return glob_acc, train_acc, train_loss


def get_max_value_index_all(num_users=100, loc_ep1=5, Numb_Glob_Iters=10, lamb=0, learning_rate=0, beta=0, algorithms="", batch_size=0, dataset="", k=0, personal_learning_rate=0, times=10, folder=""):
    """
    Get maximum testing accuracy, its index, mean accuracy, and variance for each experiment.

    Parameters:
    num_users (int): Number of users.
    loc_ep1 (int): Number of local epochs.
    Numb_Glob_Iters (int): Number of global iterations.
    lamb (float): Lambda value.
    learning_rate (float): Learning rate.
    beta (float): Beta value.
    algorithms (str): Name of the algorithm.
    batch_size (int): Batch size.
    dataset (str): Name of the dataset.
    k (int): k value.
    personal_learning_rate (float): Personal learning rate.
    times (int): Number of times to repeat the experiment.
    folder (str): The folder name where the HDF5 files are located.

    Returns:
    pandas.DataFrame: A DataFrame containing the results.
    """

    results = []

    glob_acc, _, _ = get_all_training_data_value(
        num_users, loc_ep1, Numb_Glob_Iters, lamb, learning_rate, beta, algorithms, batch_size, dataset, k, personal_learning_rate, times, folder)

    for i in range(times):
        results.append({
            "Algorithm": algorithms,
            "Folder": folder,
            "Max testing Accuracy": glob_acc[i].max(),
            "Index": np.argmax(glob_acc[i]),
            #"local update": loc_ep1
        })

    return pd.DataFrame(results)

In [239]:
algorithms = ["pFedMe_p", "pFedMe", "PerAvg_p", "FedAvg"]
dataset = "Mnist"
Numb_Glob_Iters = 800
folders = ["results_DNN", "results_MLR"]

params = {
    "results_DNN": {
        "learning_rates": [0.01, 0.01, 0.02, 0.02],
        "betas": [2.0, 2.0, 0.001, 1.0],
        "lambdas": [30, 30, 15, 15],
        "personal_learning_rate": [0.05, 0.05, 0.05, 0.05],
        "local_epochs": [20, 20, 20, 20],
        "K": [5, 5, 5, 5],
        "batch_sizes": [20, 20, 20, 20]
    },
    "results_MLR": {
        "learning_rates": [0.01, 0.01, 0.03, 0.02],
        "betas": [2.0, 2.0, 0.003, 1.0],
        "lambdas": [15, 15, 15, 15],
        "personal_learning_rate": [0.1, 0.1, 0.1, 0.1],
        "local_epochs": [20, 20, 20, 20],
        "K": [5, 5, 5, 5],
        "batch_sizes": [20, 20, 20, 20]
    }
}

results_all = []

for folder in folders:
    for i in range(len(algorithms)):
        results_df = get_max_value_index_all(
            num_users=num_users,
            loc_ep1=params[folder]["local_epochs"][i],
            Numb_Glob_Iters=Numb_Glob_Iters,
            lamb=params[folder]["lambdas"][i],
            learning_rate=params[folder]["learning_rates"][i],
            beta=params[folder]["betas"][i],
            algorithms=algorithms[i],
            batch_size=params[folder]["batch_sizes"][i],
            dataset=dataset,
            folder=folder,
            k=params[folder]["K"][i],
            personal_learning_rate=params[folder]["personal_learning_rate"][i]
        )
        results_all.append(results_df)

# Concatenate all DataFrames
all_results_df = pd.concat(results_all, ignore_index=True)

# Get only the rows with maximum accuracy for each algorithm and each folder
max_results_df = all_results_df.loc[all_results_df.groupby(["Algorithm", "Folder"])["Max testing Accuracy"].idxmax()]

# Get the mean and variance for each algorithm and each folder
mean_var_results_df = all_results_df.groupby(["Algorithm", "Folder"]).agg(
    {"Max testing Accuracy": ["mean", "var"]}
)

# Rename columns
mean_var_results_df.columns = ["Mean Accuracy", "Variance"]

# Reset index
mean_var_results_df = mean_var_results_df.reset_index()

# Merge with max_results_df
max_results_df = max_results_df.merge(mean_var_results_df, on=["Algorithm", "Folder"])

max_results_df

Unnamed: 0,Algorithm,Folder,Max testing Accuracy,Index,Mean Accuracy,Variance
0,FedAvg,results_DNN,0.962743,556,0.961744,2.43771e-07
1,FedAvg,results_MLR,0.929266,780,0.928483,4.284214e-07
2,PerAvg_p,results_DNN,0.942765,795,0.941712,6.551852e-07
3,PerAvg_p,results_MLR,0.935745,470,0.934746,5.353243e-07
4,pFedMe,results_DNN,0.965173,782,0.963688,4.413794e-07
5,pFedMe,results_MLR,0.924406,433,0.923785,3.733503e-07
6,pFedMe_p,results_DNN,0.970572,184,0.969546,5.636698e-07
7,pFedMe_p,results_MLR,0.944114,39,0.942414,1.620551e-06
