In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Building the data**
Useful parameters:


*   dataset name, x_axis, y_axis
*   input and output length
*   train ratio begin and end is used for LocalTrainData, GlobalTrainData
*   test ratio begin and end is used for LocalTestData, GlobalTestData
*   predictionSampleRatio is used for the local and global prediction datasets?
*   trainAttackerBegin is used to train the attacker's model
*   train_normalization and test_normalization are for the normalization of the data
*   trainingInterval: the size of the batch in terms of time




In [None]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

import numpy as np 
import os
import sys
import time
import gc
import pandas as pd 
import torch
import datetime
import tensorflow as tf

# notebook_path = os.path.abspath("AdaptiveSingleLocalModel.ipynb")
sys.path.append('/content/drive/MyDrive/AdaVFL-GitHub')
from Data import MakeTrainingTimes, CheckLocalTrainData, CheckLocalPredictionData, GenerateRandomSamples, LocalSequentialDataset, LocalSampledDataset, GlobalSequentialDataset, GlobalSampledDataset, build_adjMatrix, MakeAttackTimes
from federationarguments import arguments

args = arguments()
if(args.dataset == "bikeNYC"):
    link = "https://drive.google.com/drive/folders/1diJwebRNa5AQ16Jy6eHNGtYGmIGeqcrt"
    args.trainingInterval = 24*60*60*1000
    args.batch_size = 97
    args.overall_size = 2870
    args.x_axis = 8
    args.y_axis = 16
else:
    if(args.dataset == "Yelp"):
        link = "https://drive.google.com/drive/folders/1K2Y_txKAda0TOEEDYvoa7sPPMYLCXI-U"
        args.trainingInterval = 8*24*60*60*1000
        args.batch_size = 32
        args.overall_size = 1085
        args.x_axis = 8
        args.y_axis = 8
    else:
        raise SystemError('Invalid data folder')  

fluff, folder = link.split('folders/')
#print (id) 
filePath = "'%s' in parents and trashed=false" %folder
print(filePath)
downloaded = drive.ListFile({'q':filePath}).GetList()

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  #raise SystemError('GPU device not found')
  print('Found GPU at: {}'.format(device_name))
  args.device_name = 'cuda'
  torch.cuda.set_device(0)
else:
  args.device_name = 'cpu'

LocalTrainData = []
LocalValidationData = []
LocalTestData = []
LocalPredictionSamples = {}
GlobalPredictionSamples = GlobalSampledDataset(args)
GlobalTestData = GlobalSequentialDataset(args,args.testRatioBegin,args.testRatioEnd)
GlobalTrainData = GlobalSequentialDataset(args,args.trainRatioBegin,args.trainRatioEnd)

sampled_list = []
sampledPrediction = True
if len(downloaded) > 0:
  sampledPrediction = True
  for file in downloaded:
    try:
        downloaded = drive.CreateFile({'id':file['id']}) 
        downloaded.GetContentFile(file['title'])  
        timestamps = pd.read_csv(file['title'])
        grid = file['title'].split(".")
        axis = grid[0].split("X")
        x_axis = int(axis[0])
        y_axis = int(axis[1])
        train = LocalSequentialDataset(timestamps,x_axis, y_axis, args.trainRatioBegin,args.trainRatioEnd, args)
        train.make_data()
        LocalTrainData.append(train)

        if(sampledPrediction == True):
            sampled_list = GenerateRandomSamples(timestamps, args)    
            sampledPrediction = False  

        predictsample = LocalSampledDataset(timestamps, x_axis, y_axis, args)
        predictsample.make_data(sampled_list)
        LocalValidationData.append(predictsample)
        LocalValidationData.append(predictsample)
        test = LocalSequentialDataset(timestamps,x_axis, y_axis, args.testRatioBegin,args.testRatioEnd, args)
        test.make_data()
        LocalTestData.append(test)
        sample_ID = str(x_axis)+"X"+str(y_axis)
        LocalPredictionSamples[sample_ID] = predictsample
        GlobalTestData.add_data(sample_ID, timestamps)
        GlobalTrainData.add_data(sample_ID, timestamps)
        GlobalPredictionSamples.add_data(sample_ID, predictsample)
    except Exception as e:
        print("hit an exception when making data ",e)
        exit('hit an exception when making data')    

GlobalTestData.make_data()
GlobalTestData.check_data()

GlobalTrainData.make_data()
GlobalTrainData.check_data()

MakeTrainingTimes(LocalTrainData, args)

CheckLocalTrainData(LocalTrainData, args)

adj = build_adjMatrix(args)

del sampled_list
del timestamps
gc.collect()


**Privacy budget common initialization for all solutions**

In [None]:
from Data import generate_local_prediction, build_map,build_adjMatrix
from Models import GRU, MyGAT
import random
from WeightTools import (pertub_weights_process_conc,pertub_weights_process_val,pertub_weights_process_inc,
                         pertub_weights_process_ada, rho_to_sigma, sigma_to_rho,compute_advcomp_sigma, 
                         compute_advcomp_budget, rho_to_dp,compute_cumulated_budget, output_results, update_weights, 
                         pertub_weights, update_global_weights, Pair, test_model, update_budget_training, 
                         update_budget_accuracy , update_budget_increase, calculate_validation_accuracy,  
                         dp_to_zcdp, grad_func, noisyMax, perturb_gradients, compute_epsilon,
                         build_candidates, loss_score, override_model, grad_avg, sigma_to_epsilon, epsilon_to_sigma)
from tqdm import tqdm
import datetime

ratio = 1.
contribution = 1.
args.tracked_error = []
initial_budgets = 0.

beginningTime = args.beginTrainingTimestamp
endingTime = args.endTrainingTimestamp
iterations = args.epochs*(((endingTime - beginningTime)//args.trainingInterval)+1)

if args.PrivacyMode != "None":
  total_epsilon = args.epsilon_0
  total_delta = args.delta_0
  total_rho = dp_to_zcdp(args.epsilon_0,args.delta_0)
  args.total_rho = total_rho
  rho_t = total_rho/iterations
  sigma_t = rho_to_sigma(rho_t)
  delta_t = total_delta/iterations
  epsilon_t = rho_to_dp(rho_t,delta_t)
  if args.PrivacyMode == "Uniform" or args.PrivacyMode == "Validation": 
    initial_budgets=sigma_t
  else:
    if args.PrivacyMode == "Increase":
      initial_budgets=epsilon_t
    else:
      if args.PrivacyMode == "Concentrated" or args.PrivacyMode == "Adaptive" :
        initial_budgets=rho_t

**Initialize common variables**

In [None]:
assigned_epsilon = []
assigned_sigma = []
assigned_rho = []
cumulated_budget = 0.
number_of_training_rounds = 0

exec_average_local_training_loss = []
exec_average_local_RMSE = []
exec_average_local_WMAPE = []
exec_average_local_AE = []
local_training_accuracy_AE = []
local_training_accuracy_WMAPE = []
local_training_accuracy_RMSE = []
local_training_loss = []
local_loss_min = 1.
local_loss = 1.
Min_Loss_trashold = 0.016

device_model = GRU(args)

**Global model training and testing with fixed epochs**



In [None]:
from Models import  GRU, MyGAT
from tqdm import tqdm
import copy
import random 
from Data import generate_local_prediction, build_map,build_adjMatrix

import datetime
import ast
import torch.multiprocessing as mp

device_models = []
global_test_output=[]
minimum_training_accuracy = []
Loop_accuracy = []
privacy_budgets = []

for data in LocalTrainData:
  if(args.local_model == 'GRU'):
    model = GRU(args)
    #for the parallelism
    model.share_memory() 
    model.train()
  else:
    exit('Error: unrecognized local model')  
  device_models.append(model)
  privacy_budgets.append(initial_budgets)

if args.PrivacyMode != "None":
    for i in range(len(device_models)):
      device_models[i].InitializeBudget(privacy_budgets[i])


# instantiate the global model to train:
if(args.global_model == 'GNN'):
  globalModel = MyGAT(args,adj) 
  globalModel.train()
else:
  exit('Error: unrecognized global model')

# initialize variables for the training:
start_time = time.time()
global_training_loss = []
global_training_accuracy_loss_RMSE = []
global_training_accuracy_loss_WMAPE = []
global_training_accuracy_loss_AE = []
local_training_accuracy = [[]]
local_training_accuracy = np.full((len(device_models),iterations),0.)
local_training_loss = [[]]
local_training_loss = np.full((len(device_models),iterations),0.)
average_local_training_loss = []
average_local_training_loss = np.full(len(device_models),0.)
## device side, training locally
weight_dict = {}
assigned_budget = [[]]
assigned_budget = np.full((len(device_models),iterations),0.)
minimum_training_accuracy = []
minimum_training_accuracy = np.full(len(device_models),0.)
total_budget = []
total_budget = np.full(len(device_models),0.)

# global_loss_epoch = 10

mp.set_start_method('fork')
number_of_iterations = ((endingTime - beginningTime)//args.trainingInterval)+1
number_of_training_rounds = 0 
args.Halt = False
for epoch in tqdm(range(args.epochs)):
# while args.Halt == False:    
  for k in tqdm(range(number_of_iterations)):
    timestamp = random.randrange(beginningTime, endingTime, args.trainingInterval)
    # iterate over the dataset    
    # start the parallelism
    quotient = number_of_training_rounds // args.epoch_period
    remainder = number_of_training_rounds % args.epoch_period 
    processes = []
    
    weight_dict[timestamp] = []

    # iterate over the local models
    for i in range(len(device_models)):
      # train the local model
      device_models[i].train()
    
      if args.PrivacyMode == "None":  
        p = mp.Process(target=update_weights, args=(timestamp, LocalTrainData[i], device_models[i], args))
      else:
        if args.PrivacyMode ==  "Uniform":  
          p = mp.Process(target=pertub_weights, args=(timestamp, LocalTrainData[i], device_models[i],privacy_budgets[i],args))
        else:
          if args.PrivacyMode ==  "Validation":  
            p = mp.Process(target=pertub_weights_process_val, args=(timestamp, LocalTrainData[i],LocalValidationData[i], device_models[i],quotient, remainder,args))  
          else:
            if args.PrivacyMode ==  "Increase":  
              p = mp.Process(target=pertub_weights_process_inc, args=(timestamp, LocalTrainData[i],device_models[i],delta_t,quotient, remainder,args))      
            else:
              if args.PrivacyMode ==  "Concentrated":  
                p = mp.Process(target=pertub_weights_process_conc, args=(timestamp, LocalTrainData[i],LocalValidationData[i], device_models[i],delta_t,number_of_training_rounds,args))      
              else:
                if args.PrivacyMode ==  "Adaptive":  
                  p = mp.Process(target=pertub_weights_process_ada, args=(timestamp, LocalTrainData[i],device_models[i],quotient,remainder,number_of_training_rounds,args))          
      p.start()
      processes.append(p)
    for p in processes:
      p.join()
    
    for i in range(len(device_models)):
      device_models[i].eval()
      weight_dict[timestamp].append(Pair(LocalTrainData[i].x_axis, LocalTrainData[i].y_axis,device_models[i]))  
    
    # train the global model
    number_of_training_rounds += 1
    torch.cuda.empty_cache()
    weights = weight_dict[timestamp]
    predictiondata = {}
    with torch.no_grad():
      for weight_pair in weights:
        sample_ID = str(weight_pair.x_axis)+"X"+str(weight_pair.y_axis)
        predictiondataset = LocalPredictionSamples.get(sample_ID)
        predictiondata[sample_ID]= generate_local_prediction(weight_pair,predictiondataset,args)
      PredictionGlobalMap = build_map(predictiondata,args)
    global_acc_epoch,global_loss_epoch = update_global_weights(globalModel,GlobalPredictionSamples,PredictionGlobalMap,args,adj)      
    global_training_loss.append(global_loss_epoch)
    print("global_loss_epoch:",global_loss_epoch)
    global_training_accuracy_loss_RMSE.append(global_acc_epoch["RMSE"])
    global_training_accuracy_loss_WMAPE.append(global_acc_epoch["WMAPE"])
    global_training_accuracy_loss_AE.append(global_acc_epoch["AE"])
    del PredictionGlobalMap
    del predictiondata
    gc.collect()

global_acc_training_RMSE = sum(global_training_accuracy_loss_RMSE) / len(global_training_accuracy_loss_RMSE)
global_acc_training_WMAPE = sum(global_training_accuracy_loss_WMAPE) / len(global_training_accuracy_loss_WMAPE)
global_acc_training_AE = sum(global_training_accuracy_loss_AE) / len(global_training_accuracy_loss_AE)

### testing the global model: 
globalModel.eval() 
global_test_output, test_acc, test_loss = test_model(globalModel, adj, GlobalTestData,args)
print("global RMSE",test_acc["RMSE"],"global WMAPE",test_acc["WMAPE"],"global AE",test_acc["AE"])
print("global test_loss",test_loss) 

**Global model training and testing with fixed budget**

In [None]:
from Models import  GRU, MyGAT
from tqdm import tqdm
import copy
import random 
from Data import generate_local_prediction, build_map,build_adjMatrix

import datetime
import ast
import torch.multiprocessing as mp

device_models = []
global_test_output=[]
minimum_training_accuracy = []
Loop_accuracy = []
privacy_budgets = []

for data in LocalTrainData:
  if(args.local_model == 'GRU'):
    model = GRU(args)
    #for the parallelism
    model.share_memory() 
    model.train()
  else:
    exit('Error: unrecognized local model')  
  device_models.append(model)
  privacy_budgets.append(initial_budgets)

if args.PrivacyMode != "None":
    for i in range(len(device_models)):
      device_models[i].InitializeBudget(privacy_budgets[i])

# instantiate the global model to train:
if(args.global_model == 'GNN'):
  globalModel = MyGAT(args,adj) 
  globalModel.train()
else:
  exit('Error: unrecognized global model')

# initialize variables for the training:
start_time = time.time()
global_training_loss = []
global_training_accuracy_loss_RMSE = []
global_training_accuracy_loss_WMAPE = []
global_training_accuracy_loss_AE = []
local_training_accuracy = [[]]
local_training_accuracy = np.full((len(device_models),iterations),0.)
local_training_loss = [[]]
local_training_loss = np.full((len(device_models),iterations),0.)
average_local_training_loss = []
average_local_training_loss = np.full(len(device_models),0.)
## device side, training locally
weight_dict = {}
assigned_budget = [[]]
assigned_budget = np.full((len(device_models),iterations),0.)
minimum_training_accuracy = []
minimum_training_accuracy = np.full(len(device_models),0.)
total_budget = []
total_budget = np.full(len(device_models),0.)

# global_loss_epoch = 10

mp.set_start_method('fork')
number_of_iterations = ((endingTime - beginningTime)//args.trainingInterval)+1
number_of_training_rounds = 0 

Halt = False
while Halt == False:  
  if args.featureMode == "None" or args.featureMode ==  "Uniform":
    if number_of_training_rounds >= (args.epoch_period*number_of_iterations) :
      Halt = True
  for i in tqdm(range(number_of_iterations)):
    timestamp = random.randrange(beginningTime, endingTime, args.trainingInterval)
    # iterate over the dataset    
    # start the parallelism
    quotient = number_of_training_rounds // args.epoch_period
    remainder = number_of_training_rounds % args.epoch_period 
    processes = []
    
    weight_dict[timestamp] = []

    # iterate over the local models
    for i in range(len(device_models)):
      # train the local model
      device_models[i].train()
    
      if args.PrivacyMode == "None":  
        p = mp.Process(target=update_weights, args=(timestamp, LocalTrainData[i], device_models[i], args))
      else:
        if args.PrivacyMode ==  "Uniform":  
          p = mp.Process(target=pertub_weights, args=(timestamp, LocalTrainData[i], device_models[i],privacy_budgets[i],args))
        else:
          if args.PrivacyMode ==  "Validation":  
            p = mp.Process(target=pertub_weights_process_val, args=(timestamp, LocalTrainData[i],LocalValidationData[i], device_models[i],quotient, remainder,args))  
          else:
            if args.PrivacyMode ==  "Increase":  
              p = mp.Process(target=pertub_weights_process_inc, args=(timestamp, LocalTrainData[i],device_models[i],delta_t,quotient, remainder,args))      
            else:
              if args.PrivacyMode ==  "Concentrated":  
                p = mp.Process(target=pertub_weights_process_conc, args=(timestamp, LocalTrainData[i],LocalValidationData[i], device_models[i],delta_t,number_of_training_rounds,args))      
              else:
                if args.PrivacyMode ==  "Adaptive":  
                  p = mp.Process(target=pertub_weights_process_ada, args=(timestamp, LocalTrainData[i],device_models[i],quotient,remainder,number_of_training_rounds,args))          
      p.start()
      processes.append(p)
    for p in processes:
      p.join()
    
    for i in range(len(device_models)):
      device_models[i].eval()
      weight_dict[timestamp].append(Pair(LocalTrainData[i].x_axis, LocalTrainData[i].y_axis,device_models[i]))  
    
    # train the global model
    number_of_training_rounds += 1
    torch.cuda.empty_cache()
    weights = weight_dict[timestamp]
    predictiondata = {}
    with torch.no_grad():
      for weight_pair in weights:
        sample_ID = str(weight_pair.x_axis)+"X"+str(weight_pair.y_axis)
        predictiondataset = LocalPredictionSamples.get(sample_ID)
        predictiondata[sample_ID]= generate_local_prediction(weight_pair,predictiondataset,args)
      PredictionGlobalMap = build_map(predictiondata,args)
    global_acc_epoch,global_loss_epoch = update_global_weights(globalModel,GlobalPredictionSamples,PredictionGlobalMap,args,adj)      
    global_training_loss.append(global_loss_epoch)
    print("global_loss_epoch:",global_loss_epoch)
    global_training_accuracy_loss_RMSE.append(global_acc_epoch["RMSE"])
    global_training_accuracy_loss_WMAPE.append(global_acc_epoch["WMAPE"])
    global_training_accuracy_loss_AE.append(global_acc_epoch["AE"])
    del PredictionGlobalMap
    del predictiondata
    gc.collect()

global_acc_training_RMSE = sum(global_training_accuracy_loss_RMSE) / len(global_training_accuracy_loss_RMSE)
global_acc_training_WMAPE = sum(global_training_accuracy_loss_WMAPE) / len(global_training_accuracy_loss_WMAPE)
global_acc_training_AE = sum(global_training_accuracy_loss_AE) / len(global_training_accuracy_loss_AE)

### testing the global model: 
globalModel.eval() 
global_test_output, test_acc, test_loss = test_model(globalModel, adj, GlobalTestData,args)
print("global RMSE",test_acc["RMSE"],"global WMAPE",test_acc["WMAPE"],"global AE",test_acc["AE"])
print("global test_loss",test_loss) 

**Save the global model**

In [None]:
import os
import sys
import torch

sys.path.append('/content/drive/MyDrive/AdaVFL-GitHub')

ratio = args.featureRatio

if args.dataset == "Yelp":
  directory_name = "/content/drive/MyDrive/Colab Notebooks/Models/Yelp/"
else:
  directory_name = "/content/drive/MyDrive/Colab Notebooks/Models/BikeNYC/"

if args.PrivacyMode== "None":
  file_name = "NoPrivacyGAN.pth"
else:
  if args.PrivacyMode== "Uniform":
    file_name = "UniformPrivacyGAN.pth"
  else:
    if args.PrivacyMode== "Concentrated":
      file_name = "ConPrivacyGAN.pth"
    else:
      if args.PrivacyMode== "Adaptive":
        file_name = "AdaFVPrivacyGAN.pth"
      else:
        if args.PrivacyMode== "Validation":
          file_name = "ValPrivacyGAN.pth"
        else:
          if args.PrivacyMode== "Increase":
            file_name = "IncreasePrivacyGAN.pth"  

print('Saving global model...')
torch.save(globalModel.state_dict(), directory_name+file_name)
print('global saved successfully.')
