In [None]:
# ======================
#  SETUP --- global accuracy witn round names..final correct version
# ======================
!pip install flwr --quiet

import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import flwr as fl

set_seed = lambda seed=42: [torch.manual_seed(seed), np.random.seed(seed), random.seed(seed)]
set_seed()

# Paths
DATASET_PATH = "/kaggle/input/dataset-sentiment140/"
GLOVE_PATH = os.path.join(DATASET_PATH, "glove.6B.100d.txt")

# Global config
MAX_WORDS = 3000
SEQ_LEN = 100
EMBED_DIM = 100

# Tokenizer and encoder
global_tokenizer = Tokenizer(num_words=MAX_WORDS, oov_token="<OOV>")
global_label_encoder = LabelEncoder()
fitted_tokenizer = False
fitted_label_encoder = False

# ======================
#  Load Client Data
# ======================
def load_client_data(path):
    global fitted_tokenizer
    df = pd.read_csv(path)
    df.columns = df.columns.str.strip().str.lower()
    
    texts = df['text'].astype(str).tolist()
    labels = df['target'].values  # ✅ direct numeric labels (0 or 1)

    if not fitted_tokenizer:
        global_tokenizer.fit_on_texts(texts)
        fitted_tokenizer = True

    sequences = global_tokenizer.texts_to_sequences(texts)
    padded = pad_sequences(sequences, maxlen=SEQ_LEN, padding='post')
    X = torch.tensor(padded, dtype=torch.long)
    y = torch.tensor(labels, dtype=torch.long)
    
    return DataLoader(TensorDataset(X, y), batch_size=32, shuffle=True)



def load_test_data():
    df = pd.read_csv(os.path.join(DATASET_PATH, "test_data.csv"))
    df.columns = df.columns.str.strip().str.lower()

    texts = df["text"].astype(str).tolist()
    labels = df["target"].values  # ✅ direct numeric labels

    sequences = global_tokenizer.texts_to_sequences(texts)
    padded = pad_sequences(sequences, maxlen=SEQ_LEN, padding='post')
    X = torch.tensor(padded, dtype=torch.long)
    y = torch.tensor(labels, dtype=torch.long)

    return DataLoader(TensorDataset(X, y), batch_size=32)

# ======================
#  Load GloVe
# ======================
def load_glove_embeddings():
    embeddings_index = {}
    with open(GLOVE_PATH, encoding='utf8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coeffs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coeffs

    vocab_size = min(MAX_WORDS, len(global_tokenizer.word_index) + 1)
    embedding_matrix = np.zeros((vocab_size, EMBED_DIM))
    for word, i in global_tokenizer.word_index.items():
        if i < MAX_WORDS:
            vec = embeddings_index.get(word)
            if vec is not None:
                embedding_matrix[i] = vec
    return torch.tensor(embedding_matrix, dtype=torch.float)

# ======================
#  Model with Attention
# ======================
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.attn = nn.Linear(hidden_dim * 2, 1)
    def forward(self, x):
        weights = torch.softmax(self.attn(x), dim=1)
        return torch.sum(weights * x, dim=1)

class CNN_BiGRU_Attn(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim):
        super().__init__()
        vocab_size, embed_dim = embedding_matrix.shape
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.conv = nn.Conv1d(embed_dim, embed_dim, kernel_size=3, padding=1)
        self.bigru = nn.GRU(embed_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.attn = Attention(hidden_dim)
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = torch.relu(self.conv(x))
        x = x.permute(0, 2, 1)
        gru_out, _ = self.bigru(x)
        x = self.attn(gru_out)
        return self.fc(self.dropout(x))

# ======================
#  Flower Client
# ======================
class SentimentClient(fl.client.NumPyClient):
    def __init__(self, model, trainloader):
        self.model = model
        self.trainloader = trainloader
        self.testloader = load_test_data()
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(model.parameters(), lr=0.001)

    def get_parameters(self, config): return [val.cpu().numpy() for val in self.model.state_dict().values()]
    def set_parameters(self, parameters):
        state_dict = self.model.state_dict()
        for k, v in zip(state_dict.keys(), parameters):
            state_dict[k] = torch.tensor(v)
        self.model.load_state_dict(state_dict)

    def fit(self, parameters, config):
        self.set_parameters(parameters)
        self.model.train()
        for x, y in self.trainloader:
            self.optimizer.zero_grad()
            out = self.model(x)
            loss = self.criterion(out, y)
            loss.backward()
            self.optimizer.step()
        return self.get_parameters({}), len(self.trainloader.dataset), {}

    def evaluate(self, parameters, config):
        self.set_parameters(parameters)
        self.model.eval()
        total, correct, total_loss = 0, 0, 0.0
        with torch.no_grad():
            for x, y in self.testloader:
                out = self.model(x)
                loss = self.criterion(out, y)
                total_loss += loss.item()
                total += y.size(0)
                correct += (out.argmax(1) == y).sum().item()
        acc = 100 * correct / total
        avg_loss = total_loss / len(self.testloader)
        #print(f"🧪 Global Evaluation - Accuracy: {acc:.2f}%, Loss: {avg_loss:.4f}")
        return avg_loss, total, {"accuracy": acc}

# ======================
#  Run FL Simulation
# ======================
# ======================
#  Run FL Simulation with 3 Rounds
# ======================
from flwr.common import Context  # Make sure this import is included

def client_fn(cid: str):
    path = os.path.join(DATASET_PATH, f"client_{int(cid)+1}_data.csv")
    trainloader = load_client_data(path)
    model = CNN_BiGRU_Attn(embedding_matrix, hidden_dim=128, output_dim=2)
    client = SentimentClient(model, trainloader)
    return client.to_client()  # Works with Flower 1.x


# Tokenizer fitting (required before loading GloVe)
_ = load_client_data(os.path.join(DATASET_PATH, "client_1_data.csv"))
embedding_matrix = load_glove_embeddings()

#  Set rounds to 3
def weighted_average(metrics):
    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
    total_examples = sum(num_examples for num_examples, _ in metrics)
    return {"accuracy": sum(accuracies) / total_examples}

strategy = fl.server.strategy.FedAvg(
    evaluate_metrics_aggregation_fn=weighted_average,
)

# Run simulation and capture history
history = fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=3,
    config=fl.server.ServerConfig(num_rounds=3),
    strategy=strategy,
)

#  Print global accuracy after each round
print("\n📊 Global Accuracy After Each Round:")
for round_num, acc in history.metrics_distributed["accuracy"]:
    print(f"Round {round_num}: {acc:.2f}%")

#  Final accuracy
final_acc = history.metrics_distributed["accuracy"][-1][1]
print(f"\n✅ Final Global Accuracy: {final_acc:.2f}%")



	Instead, use the `flwr run` CLI command to start a local simulation in your Flower app, as shown for example below:

		$ flwr new  # Create a new Flower app from a template

		$ flwr run  # Run the Flower app in Simulation Mode

	Using `start_simulation()` is deprecated.

            This is a deprecated feature. It will be removed
            entirely in future versions of Flower.
        
[92mINFO [0m:      Starting Flower simulation, config: num_rounds=3, no round_timeout
2025-04-13 20:10:51,956	INFO worker.py:1852 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'node:172.19.2.2': 1.0, 'node:__internal_head__': 1.0, 'accelerator_type:P100': 1.0, 'CPU': 4.0, 'memory': 20105474458.0, 'object_store_memory': 8616631910.0, 'GPU': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      No `client_resources` specified. Using minimal resources f


📊 Global Accuracy After Each Round:
Round 1: 72.20%
Round 2: 81.02%
Round 3: 81.57%

✅ Final Global Accuracy: 81.57%
