In [None]:
ROOT_DIR = '/gpfs/commons/groups/gursoy_lab/aelhussein/ot_cost/otcost_fl_rebase'
import pandas as pd
import sys
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import bootstrap

In [None]:
def load_results(DATASET, grid):
    if grid:
        with open(f'{ROOT_DIR}/results/{DATASET}/{DATASET}_hyperparameter_search_personal.pkl', 'rb') as f :
            results = pickle.load(f)
    else:
        with open(f'{ROOT_DIR}/results/{DATASET}/{DATASET}_hyperparameter_search.pkl', 'rb') as f :
            results = pickle.load(f)
    return results
    
def bootstrap_ci(data):
    estimates = {}
    for c in data:
        estimates[c]={}
        for arch in data[c]:
            estimates[c][arch]={}
            for optim in data[c][arch]:
                estimates[c][arch][optim]={}
                for lr in data[c][arch][optim]:
                    res = data[c][arch][optim][lr]
                    if isinstance(res, list):
                        if len(res) == 1:
                            res = [res[0] + np.random.normal(0,1e-6)  for _ in range(10)] # to allow bootstrapping - so small were getting the value anywyas
                    elif isinstance(res, float):
                         res = [res + np.random.normal(0,1e-6) for _ in range(10)]
                    mean = np.median(res)
                    bs_reps = bootstrap(np.array(res).reshape(1,-1), statistic=np.mean, n_resamples=1000)
                    ci = bs_reps.confidence_interval[0:2]
                    estimates[c][arch][optim][lr] = np.mean([mean, mean, mean, ci[0], ci[1]])
    return estimates

def best_parameters(results_estimates):
    best_combinations = {}
    for cost, architectures in results_estimates.items():
        for architecture, optimizers in architectures.items():
            best_value = -float('inf')
            best_optimizer_lr = ""
            for optimizer, lrs in optimizers.items():
                for lr, value in lrs.items():
                    if value > best_value:
                        best_value = value
                        best_optimizer_lr = f"{optimizer}: {lr}: {value:.3f}"
            if cost not in best_combinations:
                best_combinations[cost] = {}
            best_combinations[cost][architecture] = best_optimizer_lr
    best_combinations = {key: best_combinations[key] for key in sorted(best_combinations)}
    return best_combinations

def process_results(DATASET, grid=False):
    results = load_results(DATASET, grid)
    results_estimates = bootstrap_ci(results)
    best_hyperparams = best_parameters(results_estimates)
    return results_estimates, best_hyperparams

# LR only

### Synthetic

In [None]:
DATASET = 'Synthetic'
results_estimates, best_hyperparams = process_results(DATASET)
best_hyperparams

### Credit

In [None]:
DATASET = 'Credit'
results_estimates, best_hyperparams = process_results(DATASET)
best_hyperparams

### Weather

In [None]:
DATASET = 'Weather'
results_estimates, best_hyperparams = process_results(DATASET)
best_hyperparams

### EMNIST

In [None]:
DATASET = 'EMNIST'
results_estimates, best_hyperparams = process_results(DATASET)
best_hyperparams

### CIFAR

In [None]:
DATASET = 'CIFAR'
results_estimates, best_hyperparams = process_results(DATASET)
best_hyperparams

### IXITiny

In [None]:
DATASET = 'IXITiny'
results_estimates, best_hyperparams = process_results(DATASET)
best_hyperparams

### ISIC

In [None]:
DATASET = 'ISIC'
results_estimates, best_hyperparams = process_results(DATASET)
best_hyperparams

# Grid search LR and reg param (DITTO, pFedMe)

In [None]:
grid = True

### Synthetic

In [None]:
DATASET = 'Synthetic'
results_estimates, best_hyperparams = process_results(DATASET, grid)
best_hyperparams

### Credit

In [None]:
DATASET = 'Credit'
results_estimates, best_hyperparams = process_results(DATASET, grid)
best_hyperparams

### Weather

In [None]:
DATASET = 'Weather'
results_estimates, best_hyperparams = process_results(DATASET, grid)
best_hyperparams

### EMNIST

In [None]:
DATASET = 'EMNIST'
results_estimates, best_hyperparams = process_results(DATASET, grid)
best_hyperparams

### CIFAR

In [None]:
def load_results(DATASET, grid):
    if grid:
        with open(f'{ROOT_DIR}/results/{DATASET}/{DATASET}_hyperparameter_search_personal.pkl', 'rb') as f :
            results = pickle.load(f)
    else:
        with open(f'{ROOT_DIR}/results/{DATASET}/{DATASET}_hyperparameter_search.pkl', 'rb') as f :
            results = pickle.load(f)
    return results

In [None]:
DATASET = 'CIFAR'
results_estimates, best_hyperparams = process_results(DATASET, grid)
best_hyperparams

### IXITiny

In [58]:
DATASET = 'IXITiny'
results_estimates, best_hyperparams = process_results(DATASET, grid)
best_hyperparams

{0.08: {'pfedme': 'ADM: (0.05, 0.5): 0.439',
  'ditto': 'ADM: (0.1, 0.1): 0.443'},
 0.28: {'pfedme': 'ADM: (0.05, 0.01): 0.441',
  'ditto': 'ADM: (0.05, 0.01): 0.395'},
 0.3: {'pfedme': 'ADM: (0.05, 0.001): 0.437',
  'ditto': 'ADM: (0.1, 0.01): 0.399'}}

### ISIC

In [59]:
DATASET = 'ISIC'
results_estimates, best_hyperparams = process_results(DATASET, grid)
best_hyperparams

{0.06: {'pfedme': 'ADM: (0.005, 0.5): 0.626',
  'ditto': 'ADM: (0.01, 0.5): 0.515'},
 0.15: {'pfedme': 'ADM: (0.005, 0.1): 0.582',
  'ditto': 'ADM: (0.01, 0.1): 0.514'},
 0.19: {'pfedme': 'ADM: (0.005, 0.1): 0.525',
  'ditto': 'ADM: (0.01, 0.1): 0.553'},
 0.25: {'pfedme': 'ADM: (0.005, 0.01): 0.506',
  'ditto': 'ADM: (0.01, 0.01): 0.485'},
 0.3: {'pfedme': 'ADM: (0.01, 0.001): 0.541',
  'ditto': 'ADM: (0.01, 0.01): 0.519'}}