# GCFL Experiment

### Import Packages

In [52]:
%load_ext autoreload
%autoreload 2

In [53]:
import os
import argparse
import random
import warnings
import copy

import torch
from pathlib import Path

import setupGC
from training import *

warnings.filterwarnings("ignore")

### Pre-defined Arguments, Seeds, and Output Paths

In [54]:
args = argparse.Namespace()
args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
args.local_epoch = 1
args.lr = 0.001
args.weight_decay = 5e-4
args.nlayer = 3
args.hidden = 64
args.dropout = 0.5
args.batch_size = 128
args.repeat = None
args.convert_x = False
args.overlap = False
args.standardize = False
args.epsilon1 = 0.05
args.epsilon2 = 0.1

args.num_repeat = 5
args.num_rounds = 200
args.seed = 123
args.datapath = './data'
args.outbase = './outputs'
args.dataset = 'PROTEINS'
args.num_clients = 10
args.seq_length = 5

print(args)
seed_dataSplit = 123
EPS_1 = args.epsilon1
EPS_2 = args.epsilon2

#################### set seeds and devices ####################
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)

#################### set output paths ####################
outbase = os.path.join(args.outbase, f'seqLen{args.seq_length}')
outpath = os.path.join(outbase, f"oneDS-nonOverlap")
outpath = os.path.join(outpath, f'{args.dataset}-{args.num_clients}clients', f'eps_{EPS_1}_{EPS_2}')
Path(outpath).mkdir(parents=True, exist_ok=True)
print(f"Output Path: {outpath}")

Namespace(batch_size=128, convert_x=False, datapath='./data', dataset='PROTEINS', device=device(type='cpu'), dropout=0.5, epsilon1=0.05, epsilon2=0.1, hidden=64, local_epoch=1, lr=0.001, nlayer=3, num_clients=10, num_repeat=5, num_rounds=200, outbase='./outputs', overlap=False, repeat=None, seed=123, seq_length=5, standardize=False, weight_decay=0.0005)
Output Path: ./outputs/seqLen5/oneDS-nonOverlap/PROTEINS-10clients/eps_0.05_0.1


### Prepare Dataset

In [55]:
#################### distributed one dataset to multiple clients ####################
""" using original features """
print("Preparing data (original features) ...")
data_splitted, data_stats_df = setupGC.prepareData_oneDS(datapath=args.datapath, 
                                                         data=args.dataset, 
                                                         num_client=args.num_clients, 
                                                         batchSize=args.batch_size,
                                                         convert_x=args.convert_x, 
                                                         seed=seed_dataSplit, 
                                                         overlap=args.overlap)
print("Data prepared.")

Preparing data (original features) ...
  ** PROTEINS 1113
Data prepared.


Data Type:

`splitedData`: dict with keys "0-PROTEINS", ..., "(x-1)-PROTEINS", $x$ represents the number of clients.

`splitedData['x-PROTEINS']`: tuple => `(data_dict, num_node_features, num_graph_labels, len(ds_train))`

`splitedData['x-PROTEINS'][0]`: dict => {'train', 'val', 'test'}

In [42]:
data_splitted['0-PROTEINS']

({'train': <torch_geometric.loader.dataloader.DataLoader at 0x7f89406e4550>,
  'val': <torch_geometric.loader.dataloader.DataLoader at 0x7f89406e4610>,
  'test': <torch_geometric.loader.dataloader.DataLoader at 0x7f89406e4650>},
 3,
 2,
 87)

### Write Statistical Data

In [39]:
#################### save statistics of data on clients ####################
out_data_stats = os.path.join(outpath, f'stats_trainData.csv')
data_stats_df.to_csv(out_data_stats)
print(f"Data statistics are written to {out_data_stats}")

Data statistics are written to ./outputs/seqLen5/oneDS-nonOverlap/PROTEINS-10clients/eps_0.05_0.1/stats_trainData.csv


### Initialize Clients and Server

In [None]:
init_clients, init_server, _ = setupGC.setup_devices(splitedData, args)
print("\nDone setting up devices.")

### Run GCFL Model

In [3]:
def process_gcfl(clients, server):
    print("\nDone setting up GCFL devices.")
    print("Running GCFL ...")

    if args.repeat is None:
        outfile = os.path.join(outpath, f'accuracy_gcfl_GC.csv')
    else:
        outfile = os.path.join(outpath, "repeats", f'{args.repeat}_accuracy_gcfl_GC.csv')

    frame = run_gcfl(clients, server, args.num_rounds, args.local_epoch, EPS_1, EPS_2)
    frame.to_csv(outfile)
    print(f"Wrote to file: {outfile}")

In [None]:
#################### run GCFL ####################
process_gcfl(clients=copy.deepcopy(init_clients), server=copy.deepcopy(init_server))