<a href="https://colab.research.google.com/github/Priyanka-Sachan/Complaint-Identification-using-FL/blob/master/Code_with_FL_IID.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

In [None]:
!pip install transformers
!pip install sentencepiece
!pip install datasets

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from fastai.text import *
from google.colab import files

import transformers
transformers.logging.set_verbosity_error()
from transformers import AdamW
from transformers import XLNetModel, XLNetTokenizer, XLNetForSequenceClassification
from transformers import get_scheduler
from datasets import load_metric

from tqdm.auto import tqdm
import math
import pandas as pd
import io
import numpy as np
import random

In [None]:
# For Reproducibility
SEED=9
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
def create_dataloader(input_ids,masks,labels):

  input_ids=torch.tensor(input_ids)
  masks=torch.tensor(masks)
  labels=torch.tensor(labels)

  data = TensorDataset(input_ids,masks,labels)
  sampler = SequentialSampler(data)
  dataloader = DataLoader(data, sampler=sampler, batch_size=32) 

  return dataloader

In [None]:
# To train main model intially with amazon reviews
def train_main_model(model,optimizer):

  path = untar_data(URLs.AMAZON_REVIEWS, dest = "Data")
  df = pd.read_csv(path/'train.csv', header=None, names=['rating', 'title', 'review'],  nrows=30000)

  label=[]
  for rating in df.rating.values:
    if(rating>2):
      label.append(0)
    else:
      label.append(1)
  df['label']=label

  sentences=df.review.values
  tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=True)
  MAX_LEN=49
  tokens= [tokenizer(sentence, padding='max_length',truncation="only_first", max_length=MAX_LEN) for sentence in sentences]

  input_ids=np.asarray([np.asarray(token['input_ids']) for token in tokens])
  attention_masks=np.asarray([np.asarray(token['attention_mask']) for token in tokens])
  labels=df.label.values
 
  train_inputs, valid_inputs, train_masks, valid_masks,train_labels, valid_labels = train_test_split(input_ids,attention_masks, labels,random_state=42, test_size=0.2)

  train_dataloader=create_dataloader(train_inputs,train_masks,train_labels)
  validation_dataloader=create_dataloader(valid_inputs,valid_masks,valid_labels)

  train_accuracy,train_loss,valid_accuracy,valid_loss=train_and_validate_model(model,optimizer,train_dataloader,validation_dataloader)
 
  torch.save(model.state_dict(), F"/content/gdrive/My Drive/Innovation_Lab/main_model.pt" )
  torch.save(optimizer.state_dict(), F"/content/gdrive/My Drive/Innovation_Lab/main_optimizer.pt" )


In [None]:
# Extract features from complaints data
def get_features():

  uploaded = files.upload()

  df = pd.read_csv("complaints-data.csv", header=None, names=['id', 'tweet', 'y', 'industry'])

  sentences=df.tweet.values
  tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=True)
  MAX_LEN=49
  tokens= [tokenizer(sentence, padding='max_length',truncation="only_first", max_length=MAX_LEN) for sentence in sentences]

  input_ids=np.asarray([np.asarray(token['input_ids']) for token in tokens])
  attention_masks=np.asarray([np.asarray(token['attention_mask']) for token in tokens])
  labels=df.y.values

  return input_ids,attention_masks,labels

In [None]:
# Shuffling and dividing the data into 2 parts based on label
def split_and_shuffle_labels(labels, seed):

    labels=pd.DataFrame(labels,columns=["labels"])
    labels["i"]=np.arange(len(labels))
    label_dict = dict()

    for i in range(2):
        var_name="label" + str(i)
        label_info=labels[labels["labels"]==i]
        np.random.seed(seed)
        label_info=np.random.permutation(label_info)
        label_info=pd.DataFrame(label_info, columns=["labels","i"])
        label_dict.update({var_name: label_info })
        
    return label_dict

In [None]:
# Divides the indexes in each node with an proportionate number of each label
def get_iid_subsamples_indices(label_dict, number_of_samples):

    sample_dict= dict()
    for i in range(number_of_samples):
        sample_name="sample"+str(i)
        samples=pd.DataFrame()
        
        for j in range(2):
            label_name=str("label")+str(j)
            batch_size=int(math.floor(label_dict[label_name].shape[0]/number_of_samples))
            label_sample=label_dict[label_name][i*batch_size:(i+1)*batch_size]
            samples=pd.concat([samples,label_sample], axis=0)
        samples.reset_index(drop=True, inplace=True)    
        sample_dict.update({sample_name: samples})

    return sample_dict

In [None]:
# Distributes input ids, attention masks and labels to nodes in dictionary
def create_iid_subsamples(sample_dict, input_ids,attention_masks,labels,group):
  
    input_ids_dict= dict()
    attention_masks_dict=dict()
    labels_dict= dict()
    
    for i in range(number_of_samples):  

        input_id_name= group+"_input_id"+str(i)
        attention_mask_name=group+"_attention_mask"+str(i)
        label_name= group+"_label"+str(i)
        sample_name="sample"+str(i)
        
        indices=np.sort(np.array(sample_dict[sample_name]["i"]))
        
        input_ids_info= input_ids[indices,:]
        input_ids_dict.update({input_id_name : input_ids_info})

        attention_masks_info= attention_masks[indices,:]
        attention_masks_dict.update({attention_mask_name : attention_masks_info})
        
        labels_info= labels[indices]
        labels_dict.update({label_name : labels_info})
        
    return input_ids_dict,attention_masks_dict, labels_dict

In [None]:
# To train and validate local models
def train_and_validate_model(model,optimizer,train_dataloader,validation_dataloader): 
 
    model.cuda()

    num_epochs = 4
    num_training_steps = num_epochs * len(train_dataloader)
    lr_scheduler = get_scheduler(
        "linear",
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=num_training_steps
    )
    early_stopping = EarlyStopping(patience=3, verbose=False)
 
    train_accuracy_metric=load_metric("accuracy")
    valid_accuracy_metric=load_metric("accuracy")
 
    train_loss,valid_loss=0,0
    pr_train_loss,pr_valid_loss=0,0
 
    progress_bar = tqdm(range(num_training_steps))
    for epoch in range(num_epochs):

      train_losses = []
      valid_losses = []
 
      model.train()
      for batch in train_dataloader:
          batch = tuple(t.to(device) for t in batch)
          b_input_ids, b_input_mask, b_labels = batch
          b_input_ids.cuda()
          b_input_mask.cuda()
          b_labels.cuda()
          outputs = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
          loss = outputs.loss
          logits = outputs.logits
          loss.backward()
          optimizer.step()
          lr_scheduler.step()
 
          train_losses.append(loss.item())
          predictions = torch.argmax(logits, dim=-1)
          train_accuracy_metric.add_batch(predictions=predictions,references=b_labels)
 
          optimizer.zero_grad()
          progress_bar.update(1)
 
      model.eval()
      for batch in validation_dataloader:
          batch = tuple(t.to(device) for t in batch)
          b_input_ids, b_input_mask, b_labels = batch
          b_input_ids.cuda()
          b_input_mask.cuda()
          b_labels.cuda()
          with torch.no_grad():
              outputs = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
          loss = outputs.loss
          logits = outputs.logits
 
          valid_losses.append(loss.item())
          predictions = torch.argmax(logits, dim=-1)
          valid_accuracy_metric.add_batch(predictions=predictions,references=b_labels)
 
      pr_train_loss=train_loss
      train_loss = np.average(train_losses)
      pr_valid_loss=valid_loss
      valid_loss = np.average(valid_losses)
 
      early_stopping(valid_loss, model)
          
      if early_stopping.early_stop:
          print("Early stopping")
          valid_loss=pr_valid_loss
          train_loss=pr_train_loss
          break

      train_accuracy=train_accuracy_metric.compute()['accuracy']
      valid_accuracy=valid_accuracy_metric.compute()['accuracy']

      print("EPOCH: {}".format(epoch+1),
            "| Train accuracy: {:7.5f}".format(train_accuracy),
            "| Train loss: {:7.5f}".format(train_loss),
            "| Validation accuracy: {:7.5f}".format(valid_accuracy),
            "| Validation loss: {:7.5f}".format(valid_loss))

    model.cpu()
 
    return train_accuracy,train_loss,valid_accuracy,valid_loss

In [None]:
# Evaluate models
def test_model(model,test_dataloader):

    model.cuda()

    test_losses=[]
    test_accuracy_metric=load_metric("accuracy")
    test_precision_metric=load_metric("precision")
    test_recall_metric=load_metric("recall")
    test_f1_metric=load_metric("f1")
    
    model.eval()
    for batch in test_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        with torch.no_grad():
            outputs = model(input_ids=b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
        loss = outputs.loss
        logits = outputs.logits
 
        test_losses.append(loss.item())
        predictions = torch.argmax(logits, dim=-1)
        test_accuracy_metric.add_batch(predictions=predictions,references=b_labels)
        test_precision_metric.add_batch(predictions=predictions,references=b_labels)
        test_recall_metric.add_batch(predictions=predictions,references=b_labels)
        test_f1_metric.add_batch(predictions=predictions,references=b_labels)
 
    test_loss = np.average(test_losses)
    test_accuracy=test_accuracy_metric.compute()['accuracy']
    test_precision=test_precision_metric.compute()['precision']
    test_recall=test_recall_metric.compute()['recall']
    test_f1=test_f1_metric.compute()['f1']

    model.cpu()
 
    return test_accuracy,test_loss,test_precision,test_recall,test_f1

In [None]:
# Creates a model and optimizer function for each node
def create_model_optimizer_dict(number_of_samples):

    model_dict = dict()
    optimizer_dict= dict()
    
    for i in range(number_of_samples):

        model_name="model"+str(i)
        model_info=XLNetForSequenceClassification.from_pretrained(pretrained_model_name_or_path="xlnet-base-cased", num_labels=2)
        model_dict.update({model_name : model_info })

        optimizer_name="optimizer"+str(i)
        optimizer_info = AdamW(model_dict[model_name].parameters(),lr=2e-5)
        optimizer_dict.update({optimizer_name : optimizer_info })
        
    return model_dict, optimizer_dict 

In [None]:
# Trains individual local models in nodes
def start_train_end_node_process(number_of_samples):
  
    for i in range (number_of_samples): 

        train_dataloader=create_dataloader(train_input_ids_dict[name_of_x_train_sets[i]],
                                           train_attention_masks_dict[name_of_x_masks_train_sets[i]],
                                           train_labels_dict[name_of_y_train_sets[i]])
        validation_dataloader=create_dataloader(valid_input_ids_dict[name_of_x_valid_sets[i]],
                                                valid_attention_masks_dict[name_of_x_masks_valid_sets[i]],
                                                valid_labels_dict[name_of_y_valid_sets[i]])

        model=model_dict[name_of_models[i]]
        optimizer=optimizer_dict[name_of_optimizers[i]]

        train_accuracy,train_loss,valid_accuracy,valid_loss=train_and_validate_model(model,optimizer,train_dataloader,validation_dataloader)
    
        print("CLIENT: {}".format(i+1) + 
              " | Train accuracy: {:7.5f}".format(train_accuracy)+ 
              " | Train loss: {:7.5f}".format(train_loss) +
              " | Validation accuracy: {:7.5f}".format(valid_accuracy)+
              " | Validation loss: {:7.5f}".format(valid_loss))

In [None]:
# Sends the averaged weights of individual nodes to the main model and sets them as the new weights of the main model
def get_averaged_weights_and_update_main_model(model_dict, number_of_samples):

    with torch.no_grad():
        main_model.cuda()

        for i in range(number_of_samples):
            model_dict["model"+str(i)].cuda()

            for client_parameters,main_model_parameters in zip(model_dict["model"+str(i)].parameters(),main_model.parameters()):

                if i==0:
                    main_model_parameters.data=client_parameters.data.clone().detach()
                    main_model_parameters.data/=number_of_samples
                else:
                    main_model_parameters.data+=client_parameters.data/number_of_samples

            model_dict["model"+str(i)].cpu()

        main_model.cpu()

In [None]:
# Compares the accuracy of the main model and the local model running on each node
def compare_local_and_merged_model_performance(number_of_samples):

    test_dataloader=create_dataloader(test_inputs, test_masks, test_labels)

    accuracy_table=pd.DataFrame(data=np.zeros((number_of_samples+1,5)), columns=["Model", "Accuracy", "Precision","Recall","F1"])
    
    accuracy,loss,precision,recall,f1 = test_model(main_model, test_dataloader)
    
    accuracy_table.loc[0, "Model"]="Main Model"
    accuracy_table.loc[0, "Accuracy"] = accuracy
    accuracy_table.loc[0, "Precision"] = precision
    accuracy_table.loc[0, "Recall"] = recall
    accuracy_table.loc[0, "F1"] = f1

    for i in range (number_of_samples):

        model=model_dict[name_of_models[i]]
        optimizer=optimizer_dict[name_of_optimizers[i]]

        accuracy,loss,precision,recall,f1 = test_model(model, test_dataloader)
    
        accuracy_table.loc[i+1, "Model"]="Client "+str(i)
        accuracy_table.loc[i+1, "Accuracy"] = accuracy
        accuracy_table.loc[i+1, "Precision"] = precision
        accuracy_table.loc[i+1, "Recall"] = recall
        accuracy_table.loc[i+1, "F1"] = f1

    return accuracy_table

In [None]:
# To send the merged parameters of the main model to local models
def send_main_model_to_nodes_and_update_model_dict(main_model, model_dict, number_of_samples):

    with torch.no_grad():
        main_model.cuda()

        for i in range(number_of_samples):
            model_dict["model"+str(i)].cuda()

            for client_parameters,main_model_parameters in zip(model_dict["model"+str(i)].parameters(),main_model.parameters()):
                client_parameters.data=main_model_parameters.data.clone().detach()

            model_dict["model"+str(i)].cpu()
            
        main_model.cpu()  

###EXPERIMENTATION

In [None]:
# Creating the main model
# Load XLNEtForSequenceClassification, the pretrained XLNet model with a single linear classification layer on top. 
main_model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=2)
main_optimizer = AdamW(main_model.parameters(),lr=2e-5)

In [None]:
# Initially, training model for once.
# train_main_model(main_model,main_optimizer)
# Or loading weights from drive
main_model.load_state_dict(torch.load( F"/content/gdrive/My Drive/Innovation_Lab/main_model.pt"))
main_optimizer.load_state_dict(torch.load( F"/content/gdrive/My Drive/Innovation_Lab/main_optimizer.pt"))

In [None]:
# Getting input ids, attention masks and labels from complaints
input_ids,attention_masks,labels=get_features()

In [None]:
number_of_samples=4

In [None]:
# Use train_test_split to split our data into train and validation sets for training

train_inputs, val_test_inputs, train_masks, val_test_masks,train_labels, val_test_labels = train_test_split(input_ids,attention_masks, labels,random_state=42, test_size=0.2)
validation_inputs, test_inputs,validation_masks, test_masks, validation_labels, test_labels = train_test_split(val_test_inputs,val_test_masks, val_test_labels,random_state=42, test_size=0.5)

In [None]:
# Creating clients with train and validation dataset

train_id_dict=split_and_shuffle_labels(train_labels, seed=1) 
train_sample_dict=get_iid_subsamples_indices(label_dict=train_id_dict, number_of_samples=number_of_samples)
train_input_ids_dict,train_attention_masks_dict, train_labels_dict = create_iid_subsamples(train_sample_dict, train_inputs,train_masks,train_labels,"train")

valid_id_dict = split_and_shuffle_labels(validation_labels, seed=1) 
valid_sample_dict = get_iid_subsamples_indices(valid_id_dict, number_of_samples=number_of_samples)
valid_input_ids_dict,valid_attention_masks_dict, valid_labels_dict = create_iid_subsamples(valid_sample_dict, validation_inputs,validation_masks,validation_labels,"valid")

In [None]:
# Models and optimizers functions in nodes are defined
model_dict, optimizer_dict = create_model_optimizer_dict(number_of_samples)

In [None]:
# Keys of dicts are being made iterable

name_of_x_train_sets=list(train_input_ids_dict.keys())
name_of_x_masks_train_sets=list(train_attention_masks_dict.keys())
name_of_y_train_sets=list(train_labels_dict.keys())

name_of_x_valid_sets=list(valid_input_ids_dict.keys())
name_of_x_masks_valid_sets=list(valid_attention_masks_dict.keys())
name_of_y_valid_sets=list(valid_labels_dict.keys())

name_of_models=list(model_dict.keys())
name_of_optimizers=list(optimizer_dict.keys())

In [None]:
# Updating client models
send_main_model_to_nodes_and_update_model_dict(main_model, model_dict, number_of_samples)

In [None]:
# Models in the nodes are trained
start_train_end_node_process(number_of_samples)

In [None]:
# Compares the accuracy of the main model and the local model running on each node
compare_local_and_merged_model_performance(number_of_samples)

In [None]:
# Update all client models to the federated average
get_averaged_weights_and_update_main_model(model_dict, number_of_samples)

In [None]:
# Compares the accuracy of the main model and the local model running on each node
compare_local_and_merged_model_performance(number_of_samples)

In [None]:
# Updating client models
send_main_model_to_nodes_and_update_model_dict(main_model, model_dict, number_of_samples)

##Result
```
	  Model                   Accuracy	Precision	  Recall	F1
1	Main Model (before FL)   0.759420	0.646739	0.868613	0.741433
2    Main Model (after FL)    0.855072	0.804196	0.839416	0.821429
3	Client 0	             0.843478	0.771242	0.861314	0.813793
4	Client 1	             0.872464	0.825175	0.861314	0.842857
5	Client 2	             0.843478	0.790210	0.824818	0.807143
6	Client 3	             0.872464	0.849624	0.824818	0.837037
```