In [None]:
# ======================
# 🚀 SETUP --- global accuracy with precision, recall, and F1-score 
# ======================
!pip install flwr --quiet

import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import flwr as fl

set_seed = lambda seed=42: [torch.manual_seed(seed), np.random.seed(seed), random.seed(seed)]
set_seed()

# Paths
DATASET_PATH = "/kaggle/input/data70k/"
GLOVE_PATH = os.path.join(DATASET_PATH, "glove.6B.100d.txt")

# Global config
MAX_WORDS = 3000
SEQ_LEN = 100
EMBED_DIM = 100

# Tokenizer and encoder
global_tokenizer = Tokenizer(num_words=MAX_WORDS, oov_token="<OOV>")
global_label_encoder = LabelEncoder()
fitted_tokenizer = False
fitted_label_encoder = False

# ======================
# 📅 Load Client Data
# ======================
def load_client_data(path):
    global fitted_tokenizer
    df = pd.read_csv(path)
    df.columns = df.columns.str.strip().str.lower()

    texts = df['text'].astype(str).tolist()
    labels = df['target'].values

    if not fitted_tokenizer:
        global_tokenizer.fit_on_texts(texts)
        fitted_tokenizer = True

    sequences = global_tokenizer.texts_to_sequences(texts)
    padded = pad_sequences(sequences, maxlen=SEQ_LEN, padding='post')
    X = torch.tensor(padded, dtype=torch.long)
    y = torch.tensor(labels, dtype=torch.long)

    return DataLoader(TensorDataset(X, y), batch_size=32, shuffle=True)

def load_test_data():
    df = pd.read_csv(os.path.join(DATASET_PATH, "test_data.csv"))
    df.columns = df.columns.str.strip().str.lower()

    texts = df["text"].astype(str).tolist()
    labels = df["target"].values

    sequences = global_tokenizer.texts_to_sequences(texts)
    padded = pad_sequences(sequences, maxlen=SEQ_LEN, padding='post')
    X = torch.tensor(padded, dtype=torch.long)
    y = torch.tensor(labels, dtype=torch.long)

    return DataLoader(TensorDataset(X, y), batch_size=32)

# ======================
# 🔠 Load GloVe
# ======================
def load_glove_embeddings():
    embeddings_index = {}
    with open(GLOVE_PATH, encoding='utf8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coeffs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coeffs

    vocab_size = min(MAX_WORDS, len(global_tokenizer.word_index) + 1)
    embedding_matrix = np.zeros((vocab_size, EMBED_DIM))
    for word, i in global_tokenizer.word_index.items():
        if i < MAX_WORDS:
            vec = embeddings_index.get(word)
            if vec is not None:
                embedding_matrix[i] = vec
    return torch.tensor(embedding_matrix, dtype=torch.float)

# ======================
# 🧐 Model with Attention
# ======================
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.attn = nn.Linear(hidden_dim * 2, 1)
    def forward(self, x):
        weights = torch.softmax(self.attn(x), dim=1)
        return torch.sum(weights * x, dim=1)

class CNN_BiGRU_Attn(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim):
        super().__init__()
        vocab_size, embed_dim = embedding_matrix.shape
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.conv = nn.Conv1d(embed_dim, embed_dim, kernel_size=3, padding=1)
        self.bigru = nn.GRU(embed_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.attn = Attention(hidden_dim)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = torch.relu(self.conv(x))
        x = x.permute(0, 2, 1)
        gru_out, _ = self.bigru(x)
        x = self.attn(gru_out)
        return self.fc(self.dropout(x))

# ======================
# 🌸 Flower Client
# ======================
class SentimentClient(fl.client.NumPyClient):
    def __init__(self, model, trainloader):
        self.model = model
        self.trainloader = trainloader
        self.testloader = load_test_data()
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(model.parameters(), lr=0.001)

    def get_parameters(self, config): return [val.cpu().numpy() for val in self.model.state_dict().values()]
    def set_parameters(self, parameters):
        state_dict = self.model.state_dict()
        for k, v in zip(state_dict.keys(), parameters):
            state_dict[k] = torch.tensor(v)
        self.model.load_state_dict(state_dict)

    def fit(self, parameters, config):
        self.set_parameters(parameters)
        self.model.train()
        for x, y in self.trainloader:
            self.optimizer.zero_grad()
            out = self.model(x)
            loss = self.criterion(out, y)
            loss.backward()
            self.optimizer.step()
        return self.get_parameters({}), len(self.trainloader.dataset), {}

    def evaluate(self, parameters, config):
        self.set_parameters(parameters)
        self.model.eval()
        total, correct, total_loss = 0, 0, 0.0
        all_preds, all_labels = [], []
        with torch.no_grad():
            for x, y in self.testloader:
                out = self.model(x)
                loss = self.criterion(out, y)
                total_loss += loss.item()
                total += y.size(0)
                correct += (out.argmax(1) == y).sum().item()
                all_preds.extend(out.argmax(1).cpu().numpy())
                all_labels.extend(y.cpu().numpy())
        acc = 100 * correct / total
        avg_loss = total_loss / len(self.testloader)
        precision = precision_score(all_labels, all_preds, average="weighted", zero_division=0)
        recall = recall_score(all_labels, all_preds, average="weighted", zero_division=0)
        f1 = f1_score(all_labels, all_preds, average="weighted", zero_division=0)
        return avg_loss, total, {
            "accuracy": acc,
            "precision": precision * 100,
            "recall": recall * 100,
            "f1_score": f1 * 100
        }

# ======================
# 🚀 Run FL Simulation
# ======================
from flwr.common import Context

def client_fn(cid: str):
    path = os.path.join(DATASET_PATH, f"client_{int(cid)+1}_data.csv")
    trainloader = load_client_data(path)
    model = CNN_BiGRU_Attn(embedding_matrix, hidden_dim=128, output_dim=2)
    client = SentimentClient(model, trainloader)
    return client.to_client()

_ = load_client_data(os.path.join(DATASET_PATH, "client_1_data.csv"))
embedding_matrix = load_glove_embeddings()

def weighted_average(metrics):
    total_examples = sum(num_examples for num_examples, _ in metrics)
    def weighted(key):
        return sum(num_examples * m[key] for num_examples, m in metrics) / total_examples
    return {
        "accuracy": weighted("accuracy"),
        "precision": weighted("precision"),
        "recall": weighted("recall"),
        "f1_score": weighted("f1_score")
    }

strategy = fl.server.strategy.FedAvg(
    evaluate_metrics_aggregation_fn=weighted_average,
)

history = fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=3,
    config=fl.server.ServerConfig(num_rounds=5),
    strategy=strategy,
)

print("\n📊 Global Metrics After Each Round:")
for round_num, acc in history.metrics_distributed["accuracy"]:
    prec = history.metrics_distributed["precision"][round_num - 1][1]
    rec = history.metrics_distributed["recall"][round_num - 1][1]
    f1 = history.metrics_distributed["f1_score"][round_num - 1][1]
    print(f"Round {round_num} => Accuracy: {acc:.2f}%, Precision: {prec:.2f}%, Recall: {rec:.2f}%, F1-score: {f1:.2f}%")

final_acc = history.metrics_distributed["accuracy"][-1][1]
final_precision = history.metrics_distributed["precision"][-1][1]
final_recall = history.metrics_distributed["recall"][-1][1]
final_f1 = history.metrics_distributed["f1_score"][-1][1]
print(f"\nFinal Metrics — Accuracy: {final_acc:.2f}%, Precision: {final_precision:.2f}%, Recall: {final_recall:.2f}%, F1-score: {final_f1:.2f}%")