In [1]:
import pandas as pd
import numpy as np
import tqdm

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import spacy
from torch.utils.data import TensorDataset, DataLoader
from sklearn.utils import resample
from torch.nn.utils.rnn import pad_sequence

import torch
from torchtext import data
from sklearn.metrics import f1_score
import pandas as pd 
import re
import random

import torch.nn as nn
import torch.nn.functional as F
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
import torch.optim as optim

In [7]:
!python -m spacy download en_core_web_md

Collecting en-core-web-md==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.5.0/en_core_web_md-3.5.0-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-3.5.0
[0m[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')


In [15]:
class model(nn.Module):
    def __init__(self, args):
        super(model, self).__init__()
        self.num_layers = args.n_hidden
        self.hidden_size = args.hidden_dims
        self.rnn = nn.RNN(args.input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, args.n_categories)
        
    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        out, _ = self.rnn(x, h_0)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

In [16]:
class FedClient:
    def __init__(self, id, train_data, test_data, args, global_model):
        self.client_id = id
        self.train_data_loader = train_data
        self.test_data_loader = test_data
        self.model = model(args)
        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=args.learning_rate)
        self.criterion = nn.CrossEntropyLoss()
        self.global_model = global_model

    #train client's model
    def train(self, num_epochs):
        for epoch in range(num_epochs):
            iteration = 0
            n_correct = 0
            n_samples = 0
            for batch_X, batch_Y in self.train_data_loader:
                # Zero the gradients
                self.optimizer.zero_grad()
                # Forward pass
                output = self.model(batch_X)
                # Compute the loss
                loss = self.criterion(output, batch_Y)

                _, predictions = torch.max(output, 1)
                n_samples  += batch_Y.shape[0]
                n_correct += (predictions == batch_Y).sum().item()
                acc = 100 * n_correct / n_samples


                # Backward pass
                loss.backward()
                self.optimizer.step()
                iteration += 1
                if iteration%10==0:
                    print(f'Training accuracy ={acc:.4f}')
                    print(f'epoch: {epoch+1}, step {iteration+1}, loss = {loss.item():.4f}')

    #To evaluate local model
    def evaluate(self):
        self.model.eval()  # set model to evaluation mode
        n_correct = 0
        n_samples = 0
        test_loss = 0
        with torch.no_grad():  # disable gradient calculation to save memory
            for batch_X, batch_Y in self.test_data_loader:
                # Forward pass
                output = self.model(batch_X)
                # Compute the loss
                loss = self.criterion(output, batch_Y)

                _, predictions = torch.max(output, 1)
                n_samples += batch_Y.shape[0]
                n_correct += (predictions == batch_Y).sum().item()
                test_loss += loss
        acc = 100 * n_correct / n_samples
        print(f'Evaluation accuracy = {acc:.4f}')
        return test_loss,acc

    #updates from the global model/aggregator's model param
    def update_to_local(self):
        self.model.load_state_dict(self.global_model.state_dict())

In [17]:
#Centralised server
class FedAggregator:
    def __init__(self, num_clients,args):
        self.num_clients = num_clients
        self.clients = []
        self.global_model = model(args)
        self.global_loss = float('inf')
        
    def text_cleaner(self,text):
        text = text.lower() # convert to lowercase
        text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # remove punctuation and special characters
        text = re.sub(r'\s+', ' ', text) # remove extra whitespace
        text = re.sub(r'https?://\S+', '', text) # remove URLs
        text = re.sub(r"#", "", text)
        return text
    
    def embed(self,docs):
        nlp = spacy.load('en_core_web_md')
        docs_tensor = []
        pbar = tqdm.trange(docs.shape[0])
        for t in pbar:
            doc = nlp(docs[t])
            sentence_embeddings = [token.vector for token in doc]
            docs_tensor.append(sentence_embeddings)

        docs_tensor = [torch.tensor(np.array(d)) for d in docs_tensor]
        docs_tensor = pad_sequence(docs_tensor, batch_first=True)

        print(docs_tensor.shape)
        return docs_tensor

    def load_data(self):
        raw_train_df = pd.read_csv("/kaggle/input/nlp-getting-started/train.csv")
        raw_test_df = pd.read_csv("/kaggle/input/nlp-getting-started/test.csv")
        
        del raw_train_df['keyword']
        del raw_train_df['location']
        del raw_train_df['id']

        train_data = raw_train_df.where((pd.notnull(raw_train_df)),'')
        test_data = raw_test_df.where((pd.notnull(raw_test_df)),'')

        train_data['text'] = train_data['text'].apply(self.text_cleaner)
        test_data['text'] = test_data['text'].apply(self.text_cleaner)
        
        df_majority = train_data[train_data['target'] == 0]
        df_minority = train_data[train_data['target'] == 1]

        # Upsample the minority class
        df_minority_upsampled = df_minority.sample(replace=True, n=len(df_majority), random_state=123)

        # Combine the majority class and the upsampled minority class
        df_upsampled = pd.concat([df_majority, df_minority_upsampled])

        # Shuffle the rows of the new dataframe
        train_data = df_upsampled.sample(frac=1).reset_index(drop=True)

        X, Y = train_data['text'], train_data['target']

        X_test= test_data['text']
        
        X_tensor = self.embed(X)
        X_test_tensor = self.embed(X_test)

        X_train, X_val, Y_train, Y_val = train_test_split(X_tensor, Y, test_size=0.2, random_state= 3)


        Y_train = torch.from_numpy(Y_train.values)
        Y_val = torch.from_numpy(Y_val.values)


        train_dataset = TensorDataset(X_train, Y_train)

        batch_size = 32

        val_dataset = TensorDataset(X_val, Y_val)
        val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
        test_dataset = TensorDataset(X_test_tensor)
        test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
        
        # split train_dataset across clients
        num_samples = len(train_dataset) // self.num_clients
        split_data = [num_samples] * self.num_clients
        split_data[-1] += len(train_dataset) - sum(split_data)

        train_data_splits = torch.utils.data.random_split(train_dataset, split_data)


        # create clients
        for i in range(self.num_clients):
            client_train_dataset = train_data_splits[i]
            client_train_loader = DataLoader(client_train_dataset, batch_size=32, shuffle=True)


            client = FedClient(i, client_train_loader, val_dataloader, args, self.global_model)
            self.clients.append(client)


    #Number of communication rounds        
    def run_rounds(self, num_rounds, epochs):
        for round in range(num_rounds):
            print("***********************************Round: ", round)
            client_models = []
            client_losses = []
            client_accs = []
            for client in self.clients:
                client.train(epochs)
                client_loss, client_acc = client.evaluate()
                client_losses.append(client_loss)
                client_accs.append(client_acc)
                client_model = client.model.state_dict()
                client_models.append(client_model)


            # update global model using federated averaging
            acc_loss_ratios = []
            alpha = 0.3 # top alpha% of the models wrt loss is chosen
            for i, client_model in enumerate(client_models):
                acc = client_accs[i]
                loss = client_losses[i]
                ratio = acc / loss
                acc_loss_ratios.append((ratio, i))
            acc_loss_ratios = sorted(acc_loss_ratios, reverse=True)  # sort in descending order
            ft_size = max(int(np.ceil(alpha * self.num_clients)), 1)  # ensure ft_size is at least 1

            # update global model using weighted average of models from above
            global_model_dict = {}
            total_weight = 0
            for _, i in acc_loss_ratios[:ft_size]:
                weight = 1 / client_losses[i]  # to use inverse loss as weight
                total_weight += weight
                client_model = client_models[i]
                for k in client_model.keys():
                    if k in global_model_dict:
                        global_model_dict[k] += weight * client_model[k]
                    else:
                        global_model_dict[k] = weight * client_model[k]
            for k in global_model_dict.keys():
                global_model_dict[k] /= total_weight
                self.global_model.state_dict()[k].copy_(global_model_dict[k])

            # update clients' local models with the global model
            for client in self.clients:
                client.update_to_local()

            # evaluate global model
            test_loss, test_acc = self.evaluate_global()
            print(f"Test loss: {test_loss}, Test accuracy: {test_acc}")


    def evaluate_global(self):
        self.global_model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in self.clients[0].test_data_loader:
                output = self.global_model(data)
                test_loss += F.cross_entropy(output, target, reduction='sum').item()
                # sum up batch loss
                pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
                correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(self.clients[0].test_data_loader.dataset)
        accuracy = correct / len(self.clients[0].test_data_loader.dataset)
        print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(self.clients[0].test_data_loader.dataset),
            100. * correct / len(self.clients[0].test_data_loader.dataset)))
        return test_loss, accuracy



In [18]:
input_size = 300
n_hidden = 2
hidden_dims = 128
n_categories = 2
num_epochs = 30
learning_rate = 0.0007

class Args:
    def __init__(self, input_size, n_hidden, hidden_dims, n_categories, num_epochs, learning_rate):
        self.input_size = input_size
        self.n_hidden = n_hidden
        self.hidden_dims = hidden_dims
        self.n_categories = n_categories
        self.num_epochs = num_epochs
        self.learning_rate = learning_rate

args = Args(input_size, n_hidden, hidden_dims, n_categories, num_epochs, learning_rate)

In [19]:
num_clients = 5
num_rounds = 30
epochs = 60

aggregator = FedAggregator(num_clients,args)
aggregator.load_data()

aggregator.run_rounds(num_rounds, epochs)

100%|██████████| 8684/8684 [01:13<00:00, 118.56it/s]


torch.Size([8684, 32, 300])


100%|██████████| 3263/3263 [00:28<00:00, 116.34it/s]


torch.Size([3263, 33, 300])
***********************************Round:  0
Training accuracy =50.0000
epoch: 1, step 11, loss = 0.6858
Training accuracy =50.6250
epoch: 1, step 21, loss = 0.7165
Training accuracy =49.2708
epoch: 1, step 31, loss = 0.6924
Training accuracy =49.9219
epoch: 1, step 41, loss = 0.6937
Training accuracy =52.8125
epoch: 2, step 11, loss = 0.6819
Training accuracy =52.9688
epoch: 2, step 21, loss = 0.8336
Training accuracy =51.9792
epoch: 2, step 31, loss = 0.6892
Training accuracy =53.2031
epoch: 2, step 41, loss = 0.6892
Training accuracy =55.3125
epoch: 3, step 11, loss = 0.6599
Training accuracy =56.5625
epoch: 3, step 21, loss = 0.6638
Training accuracy =56.9792
epoch: 3, step 31, loss = 0.6103
Training accuracy =56.4844
epoch: 3, step 41, loss = 0.6642
Training accuracy =64.0625
epoch: 4, step 11, loss = 0.6802
Training accuracy =63.1250
epoch: 4, step 21, loss = 0.5609
Training accuracy =63.1250
epoch: 4, step 31, loss = 0.6441
Training accuracy =62.8125
