In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Building the data**
Useful parameters:


*   dataset name, x_axis, y_axis
*   input and output length
*   train ratio begin and end is used for LocalTrainData, GlobalTrainData
*   test ratio begin and end is used for LocalTestData, GlobalTestData
*   predictionSampleRatio is used for the local and global prediction datasets?
*   trainAttackerBegin is used to train the attacker's model
*   train_normalization and test_normalization are for the normalization of the data
*   trainingInterval: the size of the batch in terms of time




In [None]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

import numpy as np # linear algebra
import os
import sys
import time
import gc
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import datetime
import tensorflow as tf

# notebook_path = os.path.abspath("AdaptiveSingleLocalModel.ipynb")
sys.path.append('/content/drive/MyDrive/AdaVFL-GitHub')
from Data import CheckLocalMembershipData, MakeTrainingTimes, CheckLocalTrainData, CheckLocalPredictionData, GenerateRandomSamples, LocalSequentialDataset, LocalSampledDataset, GlobalSequentialDataset, GlobalSampledDataset, build_adjMatrix, MakeAttackTimes, build_member_test_data, MakeMembershipAttackTimes
from federationarguments import arguments

args = arguments()
if(args.dataset == "bikeNYC"):
    link = "https://drive.google.com/drive/folders/1diJwebRNa5AQ16Jy6eHNGtYGmIGeqcrt"
    args.trainingInterval = 24*60*60*1000
    args.testingInterval = 24*60*60*1000
    args.batch_size = 97
    args.overall_size = 2870
    args.x_axis = 8
    args.y_axis = 16
else:
    if(args.dataset == "Yelp"):
        link = "https://drive.google.com/drive/folders/1K2Y_txKAda0TOEEDYvoa7sPPMYLCXI-U"
        args.trainingInterval = 8*24*60*60*1000
        args.testingInterval = 8*24*60*60*1000
        args.batch_size = 32
        args.overall_size = 1085
        args.x_axis = 8
        args.y_axis = 8
    else:
        raise SystemError('Invalid data folder')  

fluff, folder = link.split('folders/')
#print (id) 
filePath = "'%s' in parents and trashed=false" %folder
print(filePath)
downloaded = drive.ListFile({'q':filePath}).GetList()

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  #raise SystemError('GPU device not found')
  print('Found GPU at: {}'.format(device_name))
  args.device_name = 'cuda'
  torch.cuda.set_device(0)
else:
  args.device_name = 'cpu'

LocalTrainData = []
AttackerTrainData = []
AttackerMemberTestData = []
AttackerNonMemberTestData = []
LocalValidationData = []
LocalTestData = []
LocalPredictionSamples = {}

sampled_list = []
sampledPrediction = True
if len(downloaded) > 0:
  sampledPrediction = True
  for file in downloaded:
    try:
        downloaded = drive.CreateFile({'id':file['id']}) 
        downloaded.GetContentFile(file['title'])  
        timestamps = pd.read_csv(file['title'])
        grid = file['title'].split(".")
        axis = grid[0].split("X")
        x_axis = int(axis[0])
        y_axis = int(axis[1])

        train = LocalSequentialDataset(timestamps,x_axis, y_axis, args.trainRatioBegin,args.trainRatioEnd, args)
        train.make_data()
        LocalTrainData.append(train)

        attacker_train = LocalSequentialDataset(timestamps,x_axis, y_axis, args.training_attacker_begin,args.training_attacker_end, args)
        attacker_train.make_data()
        AttackerTrainData.append(attacker_train)

        attacker_test_non_member = LocalSequentialDataset(timestamps,x_axis, y_axis, args.test_non_member_begin,args.test_non_member_end, args)
        attacker_test_non_member.make_data()
        AttackerNonMemberTestData.append(attacker_test_non_member)

        attacker_test_member = LocalSequentialDataset(timestamps,x_axis, y_axis, args.test_member_begin,args.test_member_end, args)
        attacker_test_member.make_data()
        AttackerMemberTestData.append(attacker_test_member)

        if(sampledPrediction == True):
            sampled_list = GenerateRandomSamples(timestamps, args)    
            sampledPrediction = False  

        predictsample = LocalSampledDataset(timestamps, x_axis, y_axis, args)
        predictsample.make_data(sampled_list)
        LocalValidationData.append(predictsample)
        LocalValidationData.append(predictsample)
        test = LocalSequentialDataset(timestamps,x_axis, y_axis, args.testRatioBegin,args.testRatioEnd, args)
        test.make_data()
        LocalTestData.append(test)
        sample_ID = str(x_axis)+"X"+str(y_axis)
        LocalPredictionSamples[sample_ID] = predictsample
    except Exception as e:
        print("hit an exception when making data ",e)
        exit('hit an exception when making data')    

MakeTrainingTimes(LocalTrainData, args)
CheckLocalTrainData(LocalTrainData, args)

MakeMembershipAttackTimes(AttackerTrainData,AttackerMemberTestData,AttackerNonMemberTestData,args)
CheckLocalMembershipData(args)

adj = build_adjMatrix(args)

del sampled_list
del timestamps
gc.collect()


**Privacy budget common initialization for all solutions**

In [None]:
sys.path.append('/content/drive/MyDrive/AdaVFL-GitHub')
from Data import generate_local_prediction, build_map,build_adjMatrix
from Models import GRU, MyGAT, MembershipInferenceAttacker
import random
from WeightTools import (rho_to_sigma, sigma_to_rho,compute_advcomp_sigma, compute_advcomp_budget, 
                         rho_to_dp,compute_cumulated_budget, output_results, update_weights, pertub_weights, 
                         update_global_weights, Pair, test_model, update_budget_training, train_attacker,
                         update_budget_accuracy , update_budget_increase, calculate_validation_accuracy,  
                         dp_to_zcdp, grad_func, noisyMax, perturb_gradients, compute_epsilon,test_attacker,
                         build_candidates, loss_score, override_model, grad_avg, sigma_to_epsilon, epsilon_to_sigma)
from tqdm import tqdm
import datetime


beginningTime = args.beginTrainingTimestamp
endingTime = args.endTrainingTimestamp
iterations = args.epochs*(((endingTime - beginningTime)//args.trainingInterval)+1)

if args.PrivacyMode != "None":
  total_epsilon = args.epsilon_0
  total_delta = args.delta_0
  total_rho = dp_to_zcdp(args.epsilon_0,args.delta_0)
  args.total_rho = total_rho
  rho_t = total_rho/iterations
  sigma_t = rho_to_sigma(rho_t)
  delta_t = total_delta/iterations
  epsilon_t = rho_to_dp(rho_t,delta_t)
  if args.PrivacyMode == "Uniform" or args.PrivacyMode == "Validation": 
    privacy_budgets = sigma_t
  else:
    if args.PrivacyMode == "Increase":
      privacy_budgets = epsilon_t
    else:
      if args.PrivacyMode == "Concentrated" or args.PrivacyMode == "Adaptive" :
        privacy_budgets = rho_t


**Initialize common variables**

In [None]:
assigned_epsilon = []
assigned_sigma = []
assigned_rho = []
cumulated_budget = 0.
number_of_training_rounds = 0

exec_average_local_training_loss = []
exec_average_local_RMSE = []
exec_average_local_WMAPE = []
exec_average_local_AE = []
local_training_accuracy_AE = []
local_training_accuracy_WMAPE = []
local_training_accuracy_RMSE = []
local_training_loss = []
local_loss_min = 1.
local_loss = 1.
Min_Loss_trashold = 0.016

device_model = GRU(args)
local_model_index = 0
if args.PrivacyMode != "None":
  device_model.InitializeBudget(privacy_budgets)
  
AttackerTestData = build_member_test_data(AttackerMemberTestData[local_model_index],AttackerNonMemberTestData[local_model_index])  

**Non-private**

In [None]:
sys.path.append('/content/drive/MyDrive/AdaVFL-GitHub')
import math
import copy
observed_captures = []
# initialize variables for the training:
start_time = time.time()
number_of_iterations = ((endingTime - beginningTime)//args.trainingInterval)+1

## stop condition is the number of epochs
for epoch in tqdm(range(args.epochs)):
  for iteration in tqdm(range(number_of_iterations)):
    timestamp = random.randrange(beginningTime, endingTime, args.trainingInterval)
    # train the local model
    # apply privacy if specified on the local model
    if args.PrivacyMode == "None":
      local_training_acc,local_loss = update_weights(timestamp, LocalTrainData[local_model_index], device_model, args)
      if (local_loss_min > local_loss):
        local_loss_min = local_loss 
    local_training_loss.append(local_loss)
    print("local_loss ",local_loss)
    local_training_accuracy_AE.append(local_training_acc["AE"])
    local_training_accuracy_WMAPE.append(local_training_acc["WMAPE"])
    local_training_accuracy_RMSE.append(local_training_acc["RMSE"])
    number_of_training_rounds += 1
    torch.cuda.empty_cache()
    gc.collect()
  # capture snapshot of the model while training
  if epoch in args.observedEpochs:
    observed_captures.append(copy.deepcopy(device_model))

exec_average_local_training_loss.append(sum(local_training_loss)/ number_of_training_rounds)
exec_average_local_RMSE.append(sum(local_training_accuracy_RMSE) / number_of_training_rounds)
exec_average_local_WMAPE.append(sum(local_training_accuracy_WMAPE) / number_of_training_rounds)
exec_average_local_AE.append(sum(local_training_accuracy_AE) / number_of_training_rounds)
if args.PrivacyMode != "None":
  print("\n total cumulated_budget:",  cumulated_budget)

print("local_loss_min,",local_loss_min)

print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))

output_results(args,exec_average_local_RMSE,exec_average_local_WMAPE,exec_average_local_AE,
      device_model, LocalTestData, local_training_loss, assigned_sigma)


**Membership inference attack**

In [None]:
start_time = time.time()
test_acc = [] 
attacker_model = MembershipInferenceAttacker(args)

beginningTime = args.beginTrainMembershipTimestamp
endingTime = args.endTrainMembershipTimestamp
number_of_iterations = ((endingTime - beginningTime)//args.testingInterval)+1

#train the attacker model
for epoch in tqdm(range(args.attacker_epochs)):
  for iteration in tqdm(range(number_of_iterations)):
    timestamp = random.randrange(beginningTime, endingTime, args.testingInterval)
    local_loss = train_attacker(timestamp, AttackerTrainData[local_model_index], attacker_model, observed_captures, args)
    local_training_loss.append(local_loss)
    print("local_loss ",local_loss)

#test the attacker model
attacker_model.eval()
acc= test_attacker(AttackerTestData,attacker_model, device_model,args)
print("the accuracy is ", acc)

**Adaptive**

In [None]:
# device_model = GRU(args)

# initialize variables for the training:
start_time = time.time()
assigned_budget = []
assigned_sigma = []

minimum_training_accuracy = 0.08
Loop_accuracy = []
Loop_accuracy =  np.full(args.epoch_period, 0.) 
Min_Loss_trashold = 0.06
number_epochs = 0
# Min_AE_trashold = -0.1

## stop condition is the number of epochs
for epoch in tqdm(range(args.epochs)):

## stop condition is the total budget  
# while cumulated_budget < total_budget:

## stop condition is the loss threshold 
# while local_loss > Min_Loss_trashold:
  number_epochs+=1
  # if cumulated_budget >  total_budget:
  #   break
  # if local_loss <  Min_Loss_trashold:
  #   break
  number_of_iterations = ((endingTime - beginningTime)//args.trainingInterval)+1
  for iteration in tqdm(range(number_of_iterations)):
    timestamp = random.randrange(beginningTime, endingTime, args.trainingInterval)
    # if local_loss <  Min_Loss_trashold:
    #   break
    # if cumulated_budget >  total_budget:
    #   break
    # train the local model
    # apply privacy if specified on the local model
    # train the local model
    # apply privacy if specified on the local model
    quotient = number_of_training_rounds // args.epoch_period
    remainder = number_of_training_rounds % args.epoch_period
    if args.PrivacyMode ==  "Adaptive":
      if args.AdaptiveError == "Training":
        if quotient == 0:
          if(number_of_training_rounds == 0):
            Loop_accuracy[remainder] = 0.
            # minimum_training_accuracy = 0.2 #loss
            privacy_budgets = rho_t
            # minimum_training_accuracy = 0.2 #RMSE
            # minimum_training_accuracy = 0.1 #MAE
          else:
            # Loop_accuracy[remainder] = local_training_acc[args.validation_accuracy_metric]
            Loop_accuracy[remainder] = local_loss
            # if minimum_training_accuracy > local_training_acc[args.trining_accuracy_metric]:
            #   minimum_training_accuracy = local_training_acc[args.trining_accuracy_metric]  
          args.tracked_error.append(0.)
        else:
          # if minimum_training_accuracy > local_training_acc[args.trining_accuracy_metric]:
          #     minimum_training_accuracy = local_training_acc[args.trining_accuracy_metric]
          if remainder == 0:
            error, budget = update_budget_training(privacy_budgets, local_loss, Loop_accuracy, minimum_training_accuracy, args)
            args.tracked_error.append(error)
            privacy_budgets = budget
          else:
            # args.tracked_error.append(local_training_acc[args.trining_accuracy_metric])
            # Loop_accuracy[remainder] = local_training_acc[args.trining_accuracy_metric]
            args.tracked_error.append(local_loss)
            Loop_accuracy[remainder] = local_loss
      sigma_t = rho_to_sigma(privacy_budgets)    
      local_training_acc,local_loss = pertub_weights(timestamp, LocalTrainData[0], device_model,sigma_t,args)  
      print("local_loss ",local_loss)
      adjusted_budget=rho_to_dp(privacy_budgets,delta_t)   
      assigned_sigma.append(sigma_t)
      assigned_epsilon.append(adjusted_budget)  
      assigned_rho.append(privacy_budgets)
      local_ae = local_training_acc["AE"]
      cumulated_budget=compute_cumulated_budget(privacy_budgets,cumulated_budget)
      if (local_loss_min > local_loss):
        local_loss_min = local_loss 
    local_training_loss.append(local_loss)
    local_training_accuracy_AE.append(local_training_acc["AE"])
    local_training_accuracy_WMAPE.append(local_training_acc["WMAPE"])
    local_training_accuracy_RMSE.append(local_training_acc["RMSE"])
    number_of_training_rounds += 1
    torch.cuda.empty_cache()
    gc.collect()

exec_average_local_training_loss.append(sum(local_training_loss)/ number_of_training_rounds)
exec_average_local_RMSE.append(sum(local_training_accuracy_RMSE) / number_of_training_rounds)
exec_average_local_WMAPE.append(sum(local_training_accuracy_WMAPE) / number_of_training_rounds)
exec_average_local_AE.append(sum(local_training_accuracy_AE) / number_of_training_rounds)

output_results(args,exec_average_local_RMSE,exec_average_local_WMAPE,exec_average_local_AE,
      device_model, LocalTestData, local_training_loss, assigned_sigma)

