# Fedration learning on url dataset


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from copy import deepcopy
import plotly.graph_objects as go

# === Hyperparameters ===
num_clients = 3
num_rounds = 10
local_epochs = 2
batch_size = 32
learning_rate = 0.01


In [2]:
from datasets import load_dataset
import pandas as pd
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [3]:
#dataset1

# Load the dataset from Hugging Face Hub
train_dataset = load_dataset("kmack/Phishing_urls", split="train")
test_dataset = load_dataset("kmack/Phishing_urls", split="test")
valid_dataset = load_dataset("kmack/Phishing_urls", split="valid")

# Convert to pandas DataFrame
train_df = train_dataset.to_pandas()
test_df = test_dataset.to_pandas()
valid_df = valid_dataset.to_pandas()


all_df = [train_df, test_df,valid_df]
for i, df in enumerate(all_df):
    df['url'] = df['text']
    df.drop('text',axis=1,inplace=True)
print(train_df.head())

   label                                                url
0      0             xenophongroup.com/montjoie/compgns.htm
1      1    www.azzali.eu/&usg=AOvVaw2phVSb_ENMrkATGNx5LQ0l
2      1                     guildmusic.edu.au/js/index.htm
3      1  memo.unexpectedrunner.com/ezxgytw4et\nholotili...
4      0  en.wikipedia.org/wiki/Category:American_televi...


In [4]:
# Special tokens
special_tokens = ['<PAD>', '<UNK>', '<START>', '<END>']

# ASCII printable characters
ascii_chars = [chr(i) for i in range(32, 127)]  # ' ' (space) to '~'

# Full vocabulary
vocab = special_tokens + ascii_chars

# Create mappings
char2idx = {ch: idx for idx, ch in enumerate(vocab)}
idx2char = {idx: ch for ch, idx in char2idx.items()}
vocab_size = len(vocab)
print("Vocabulary size:", vocab_size)
print("Sample:", vocab[:50])


Vocabulary size: 99
Sample: ['<PAD>', '<UNK>', '<START>', '<END>', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M']


In [5]:
train_df = train_df.drop_duplicates(subset =['url'])

In [6]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn.functional as F
tqdm.pandas()

max_url_length = 50


def encode_url(url):
    indices = torch.tensor([0]*max_url_length)
    for i,c in enumerate(url[:max_url_length]):
        indices[i] = char2idx.get(c, char2idx['<UNK>'])
    #print(indices)  


    return indices

# Apply encoding to your URL column
train_df['encode'] = train_df['url'].progress_apply(encode_url)
test_df['encode'] = test_df['url'].progress_apply(encode_url)
valid_df['encode'] = valid_df['url'].progress_apply(encode_url)



100%|██████████| 535838/535838 [01:10<00:00, 7643.80it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['encode'] = train_df['url'].progress_apply(encode_url)
100%|██████████| 70882/70882 [00:09<00:00, 7661.89it/s]
100%|██████████| 70882/70882 [00:09<00:00, 7415.40it/s]


In [7]:
from torchvision import datasets, transforms
from torch.utils.data import TensorDataset, DataLoader
import torch

# === Min-Max Normalization ===
# Since ToTensor() already scales pixels from [0,255] to [0,1], we don't need further scaling
# Optionally, you can just ensure the range stays [0,1] explicitly
# Convert encoded URLs to proper tensors
url_tensor = torch.tensor(np.stack(train_df['encode'].values), dtype=torch.long)
labels_tensor = torch.tensor(train_df['label'].values, dtype=torch.long)

valid_url_tensor = torch.tensor(np.stack(valid_df['encode'].values), dtype=torch.long)
valid_labels_tensor = torch.tensor(valid_df['label'].values, dtype=torch.long)

test_url_tensor = torch.tensor(np.stack(test_df['encode'].values), dtype=torch.long)
test_labels_tensor = torch.tensor(test_df['label'].values, dtype=torch.long)

# Take only a fraction for faster runs
train_fraction = 0.7
test_fraction = 0.7

train_len = int(len(url_tensor) * train_fraction)
test_len = int(len(valid_url_tensor) * test_fraction)

train_subset = torch.utils.data.Subset(TensorDataset(url_tensor, labels_tensor), range(train_len))
test_subset = torch.utils.data.Subset(TensorDataset(valid_url_tensor, valid_labels_tensor), range(test_len))

# Split among clients
client_data_sizes = ([len(train_subset)//num_clients] * (num_clients - 1)) + [
    len(train_subset) - (len(train_subset)//num_clients)*(num_clients - 1)
]

client_datasets = [
    torch.utils.data.Subset(train_subset, range(sum(client_data_sizes[:i]), sum(client_data_sizes[:i+1])))
    for i in range(num_clients)
]

client_loaders = [torch.utils.data.DataLoader(ds, batch_size=batch_size, shuffle=True)
                  for ds in client_datasets]

test_loader = torch.utils.data.DataLoader(test_subset, batch_size=batch_size, shuffle=False)


In [8]:
# === Local Training Function ===
def train_local(model, train_loader, epochs):
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    
    for epoch in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
    return model.state_dict()

In [9]:
# === Weighted Federated Averaging Function ===
def weighted_federated_average(state_dicts, data_sizes):
    total_size = sum(data_sizes)
    avg_state_dict = deepcopy(state_dicts[0])
    
    # Initialize weighted contribution from first client
    for key in avg_state_dict.keys():
        avg_state_dict[key] = avg_state_dict[key] * (data_sizes[0]/total_size)
    
    # Add weighted contributions from other clients
    for i in range(1, len(state_dicts)):
        for key in avg_state_dict.keys():
            avg_state_dict[key] += state_dicts[i][key] * (data_sizes[i]/total_size)
    
    return avg_state_dict

# === Aggregation Algorithms ===
def fedavg_aggregate(local_state_dicts, client_data_sizes, global_state):
    total_data = sum(client_data_sizes)
    new_state = deepcopy(global_state)
    for key in global_state.keys():
        if 'fc' in key or 'classifier' in key:
            new_state[key] = sum([
                (client_data_sizes[i] / total_data) * local_state_dicts[i][key]
                for i in range(len(local_state_dicts))
            ])
    return new_state

def fedavg_momentum_aggregate(local_state_dicts, client_data_sizes, global_state, v_prev, momentum=0.9):
    total_data = sum(client_data_sizes)
    new_state = deepcopy(global_state)
    delta_w = {}
    for key in global_state.keys():
        if 'fc' in key or 'classifier' in key:
            delta_w[key] = sum([
                (client_data_sizes[i] / total_data) *
                (local_state_dicts[i][key] - global_state[key])
                for i in range(len(local_state_dicts))
            ])
            delta_w[key] += momentum * v_prev[key]
        else:
            delta_w[key] = torch.zeros_like(global_state[key])
    for key in global_state.keys():
        new_state[key] += delta_w[key]
    return new_state, delta_w

# === Evaluation Function ===
def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            correct += (pred == target).sum().item()
            total += target.size(0)
    return correct / total

In [10]:
class URLBinaryCNN(nn.Module):
    def __init__(self, vocab_size):
        super(URLBinaryCNN, self).__init__()

        # ----- Embedding -----
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=128)

        # ----- Conv Block 1 -----
        self.conv1 = nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(128)
        self.pool1 = nn.MaxPool1d(kernel_size=2)

        # ----- Conv Block 2 -----
        self.conv2 = nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(128)
        self.pool2 = nn.MaxPool1d(kernel_size=2)

        # ----- Conv Block 3 -----
        self.conv3 = nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(128)
        self.pool3 = nn.MaxPool1d(kernel_size=2)

        # ----- Conv Block 4 -----
        self.conv4 = nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm1d(128)
        self.pool4 = nn.MaxPool1d(kernel_size=2)

        # ----- Conv Block 5 -----
        self.conv5 = nn.Conv1d(in_channels=128, out_channels=16, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm1d(16)

        # ----- Global Pooling -----
        self.global_pool = nn.AdaptiveMaxPool1d(1)

        # ----- Fully Connected -----
        self.fc1 = nn.Linear(16, 256)
        self.dropout1 = nn.Dropout(0.4582219325663578)
        self.fc2 = nn.Linear(256, 256)
        self.dropout2 = nn.Dropout(0.13788391792099114)
        self.sigmoid = nn.Sigmoid()
        self.fc3 = nn.Linear(256, 1)

    def forward(self, x):
        x = self.embedding(x)          # (batch, seq_len, embed_dim)
        x = x.permute(0, 2, 1)         # (batch, embed_dim, seq_len)

        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool1(x)

        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool2(x)

        x = F.relu(self.bn3(self.conv3(x)))
        x = self.pool3(x)

        x = F.relu(self.bn4(self.conv4(x)))
        x = self.pool4(x)

        x = F.relu(self.bn5(self.conv5(x)))
        x = self.global_pool(x)
        x = x.squeeze(-1)

        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.sigmoid(self.fc3(x))

        return x

In [11]:
# === Initialize Models ===
global_fedavg = URLBinaryCNN(vocab_size).to(device)
global_fedavgM = deepcopy(global_fedavg)
v_prev = {k: torch.zeros_like(v) for k, v in global_fedavgM.state_dict().items()}
momentum = 0.9

# New models for additional aggregations
global_fedAWA = deepcopy(global_fedavg)
global_moss = deepcopy(global_fedavg)
global_fedADP = deepcopy(global_fedavg)

# === Trackers ===
epochs_history = []

# Global accuracies
fedavg_accs, fedavgM_accs = [], []
fedAWA_accs, moss_accs, fedADP_accs = [], [], []

# Local accuracies per client
local_accs_fedavg = [[] for _ in range(num_clients)]
local_accs_fedavgM = [[] for _ in range(num_clients)]
local_accs_fedAWA = [[] for _ in range(num_clients)]
local_accs_moss = [[] for _ in range(num_clients)]
local_accs_fedADP = [[] for _ in range(num_clients)]

# === Helper aggregation functions ===

def fedAWA_aggregate(state_dicts, prev_global):
    """Adaptive weighting by cosine similarity to global model"""
    new_state = deepcopy(prev_global)
    for key in new_state.keys():
        # compute similarity weights (simulated here by sum of updates)
        updates = torch.stack([sd[key] - prev_global[key] for sd in state_dicts], dim=0)
        weights = torch.softmax(torch.sum(updates, dim=list(range(1, updates.ndim))), dim=0)
        new_state[key] = prev_global[key] + torch.sum(weights.view(-1, *[1]*(updates.ndim-1)) * updates, dim=0)
    return new_state

def moss_aggregate(state_dicts):
    """Full-weight aggregation (simple mean simulated)"""
    avg_state = deepcopy(state_dicts[0])
    for key in avg_state.keys():
        for i in range(1, len(state_dicts)):
            avg_state[key] += state_dicts[i][key]
        avg_state[key] /= len(state_dicts)
    return avg_state

def fedADP_aggregate(state_dicts, data_sizes):
    """Simulated dynamic aggregation (weighted avg by data sizes)"""
    total_size = sum(data_sizes)
    avg_state = deepcopy(state_dicts[0])
    for key in avg_state.keys():
        avg_state[key] = avg_state[key] * (data_sizes[0]/total_size)
    for i in range(1, len(state_dicts)):
        for key in avg_state.keys():
            avg_state[key] += state_dicts[i][key] * (data_sizes[i]/total_size)
    return avg_state

# === Federated Training Loop ===
for r in range(num_rounds):
    print(f"\n--- Round {r+1} ---")

    # --- FedAvg ---
    local_states_avg = []
    for c in range(num_clients):
        local_model = deepcopy(global_fedavg)
        local_state = train_local(local_model, client_loaders[c], local_epochs)
        local_states_avg.append(local_state)
        local_accs_fedavg[c].append(test_model(local_model, client_loaders[c]) * 100)

    # --- FedAvgM ---
    local_states_mom = []
    for c in range(num_clients):
        local_model = deepcopy(global_fedavgM)
        local_state = train_local(local_model, client_loaders[c], local_epochs)
        local_states_mom.append(local_state)
        local_accs_fedavgM[c].append(test_model(local_model, client_loaders[c]) * 100)

    # --- FedAWA ---
    local_states_AWA = []
    for c in range(num_clients):
        local_model = deepcopy(global_fedAWA)
        local_state = train_local(local_model, client_loaders[c], local_epochs)
        local_states_AWA.append(local_state)
        local_accs_fedAWA[c].append(test_model(local_model, client_loaders[c]) * 100)

    # --- Moss ---
    local_states_moss = []
    for c in range(num_clients):
        local_model = deepcopy(global_moss)
        local_state = train_local(local_model, client_loaders[c], local_epochs)
        local_states_moss.append(local_state)
        local_accs_moss[c].append(test_model(local_model, client_loaders[c]) * 100)

    # --- FedADP ---
    local_states_ADP = []
    for c in range(num_clients):
        local_model = deepcopy(global_fedADP)
        local_state = train_local(local_model, client_loaders[c], local_epochs)
        local_states_ADP.append(local_state)
        local_accs_fedADP[c].append(test_model(local_model, client_loaders[c]) * 100)

    # === Aggregate ===
    global_fedavg.load_state_dict(fedavg_aggregate(local_states_avg, client_data_sizes, global_fedavg.state_dict()))
    global_fedavgM.load_state_dict(fedavg_momentum_aggregate(local_states_mom, client_data_sizes, global_fedavgM.state_dict(), v_prev, momentum)[0])
    v_prev = fedavg_momentum_aggregate(local_states_mom, client_data_sizes, global_fedavgM.state_dict(), v_prev, momentum)[1]
    global_fedAWA.load_state_dict(fedAWA_aggregate(local_states_AWA, global_fedAWA.state_dict()))
    global_moss.load_state_dict(moss_aggregate(local_states_moss))
    global_fedADP.load_state_dict(fedADP_aggregate(local_states_ADP, client_data_sizes))

    # === Test global models ===
    acc_fedavg = test_model(global_fedavg, test_loader) * 100
    acc_fedavgM = test_model(global_fedavgM, test_loader) * 100
    acc_fedAWA = test_model(global_fedAWA, test_loader) * 100
    acc_moss = test_model(global_moss, test_loader) * 100
    acc_fedADP = test_model(global_fedADP, test_loader) * 100
    print(f"FedAvg: {acc_fedavg:.2f}% | FedAvgM: {acc_fedavgM:.2f}% | FedAWA: {acc_fedAWA:.2f}% | Moss: {acc_moss:.2f}% | FedADP: {acc_fedADP:.2f}%")

    # === Save histories ===
    fedavg_accs.append(acc_fedavg)
    fedavgM_accs.append(acc_fedavgM)
    fedAWA_accs.append(acc_fedAWA)
    moss_accs.append(acc_moss)
    fedADP_accs.append(acc_fedADP)
    epochs_history.append((r+1) * local_epochs)





--- Round 1 ---


AcceleratorError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# === Plot all 5 aggregation methods ===
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(x=epochs_history, y=fedavg_accs, mode='lines+markers', name='FedAvg', line=dict(width=3, color='black')))
fig.add_trace(go.Scatter(x=epochs_history, y=fedavgM_accs, mode='lines+markers', name='FedAvgM', line=dict(width=3, color='blue', dash='dot')))
fig.add_trace(go.Scatter(x=epochs_history, y=fedAWA_accs, mode='lines+markers', name='FedAWA', line=dict(width=3, color='red', dash='dash')))
fig.add_trace(go.Scatter(x=epochs_history, y=moss_accs, mode='lines+markers', name='Moss', line=dict(width=3, color='green', dash='dot')))
fig.add_trace(go.Scatter(x=epochs_history, y=fedADP_accs, mode='lines+markers', name='FedADP', line=dict(width=3, color='orange', dash='dash')))


fig.update_layout(title='Global Model Accuracies for Different Aggregation Functions',
                  xaxis_title='Cumulative Epochs',
                  yaxis_title='Accuracy (%)',
                  template='plotly_white')

fig.show()

In [9]:
# === Plotly Visualization ===
fig = go.Figure()

# Global Accuracies
fig.add_trace(go.Scatter(
    x=epochs_history, y=fedavg_accs,
    mode="lines+markers", name="FedAvg Global Acc", line=dict(color="black", width=3)
))
fig.add_trace(go.Scatter(
    x=epochs_history, y=fedavgM_accs,
    mode="lines+markers", name="FedAvgM Global Acc", line=dict(color="purple", width=3)
))

# Local Clients Accuracies
colors = [
    "#FF0000",  # Red
    "#0000FF",  # Blue
    "#00AA00",  # Green
    "#FFA500",  # Orange
    "#800080",  # Purple
    "#00CED1",  # Dark Turquoise
    "#FFD700",  # Gold
    "#A52A2A",  # Brown
    "#808080",  # Gray
    "#FF69B4",  # Hot Pink
    "#00FFFF",  # Cyan
    "#8B0000",  # Dark Red
    "#4B0082",  # Indigo
    "#7FFF00",  # Chartreuse
    "#FF1493",  # Deep Pink
    "#2E8B57",  # Sea Green
    "#D2691E",  # Chocolate
    "#1E90FF",  # Dodger Blue
    "#F0E68C",  # Khaki
    "#FF4500"   # Orange Red
]


for c in range(num_clients):
    fig.add_trace(go.Scatter(
        x=epochs_history, y=local_accs_fedavg[c],
        mode="lines+markers", name=f"FedAvg Client {c+1}", line=dict(color=colors[c], dash="dot")
    ))
    fig.add_trace(go.Scatter(
        x=epochs_history, y=local_accs_fedavgM[c],
        mode="lines+markers", name=f"FedAvgM Client {c+1}", line=dict(color=colors[c], dash="dash")
    ))



fig.update_layout(
    title="Federated Learning: FedAvg vs FedAvgM (Global + Local Accuracies)",
    xaxis_title="Cumulative Epochs",
    yaxis_title="Accuracy (%)",
    template="plotly_white"
)
fig.show()


In [10]:
import io
import os
import hashlib
import hmac
from copy import deepcopy

# === Simulated Blockchain utilities ===

class SimpleBlock:
    def __init__(self, prev_hash, round_idx, aggregation_type, txs, aggregate_hash=None):
        """
        txs: list of dicts { 'client_id': int, 'model_hash': str, 'signature': str, 'agg_type': str(optional) }
        aggregate_hash: hash of aggregated global model (if this block contains aggregated result)
        """
        self.prev_hash = prev_hash
        self.round = round_idx
        self.aggregation = aggregation_type
        self.txs = txs
        self.aggregate_hash = aggregate_hash
        self.block_hash = self.compute_block_hash()

    def compute_block_hash(self):
        m = hashlib.sha256()
        m.update((str(self.prev_hash) + str(self.round) + str(self.aggregation)).encode())
        for tx in self.txs:
            m.update((tx['model_hash'] + tx['signature'] + str(tx['client_id'])).encode())
        if self.aggregate_hash:
            m.update(self.aggregate_hash.encode())
        return m.hexdigest()

class SimpleBlockchain:
    def __init__(self):
        self.chain = []
        self.genesis()

    def genesis(self):
        genesis_block = SimpleBlock(prev_hash="0"*64, round_idx=0, aggregation_type="GENESIS", txs=[])
        self.chain.append(genesis_block)

    def add_block(self, block: SimpleBlock):
        # Basic validation: predecessor match
        if block.prev_hash != self.chain[-1].block_hash:
            raise ValueError("Invalid prev_hash for new block")
        self.chain.append(block)

    def last_hash(self):
        return self.chain[-1].block_hash

# === Helpers for serializing state_dict and signing ===

def state_dict_to_bytes(state_dict):
    """Serialize a state_dict to bytes using torch.save into a buffer."""
    buffer = io.BytesIO()
    torch.save(state_dict, buffer)
    buffer.seek(0)
    return buffer.read()

def compute_model_hash(bytes_blob):
    return hashlib.sha256(bytes_blob).hexdigest()

def sign_model_hash(model_hash: str, key: bytes):
    """Simulated signature using HMAC-SHA256 with a per-client secret key."""
    return hmac.new(key, model_hash.encode(), hashlib.sha256).hexdigest()

def verify_signature(model_hash: str, signature: str, key: bytes):
    expected = hmac.new(key, model_hash.encode(), hashlib.sha256).hexdigest()
    return hmac.compare_digest(expected, signature)

# === Create client secret keys (for demo) and server key ===
client_secret_keys = {c: hashlib.sha256(f"client_key_{c}".encode()).digest() for c in range(num_clients)}
server_secret_key = hashlib.sha256(b"server_key").digest()

# === Initialize Blockchain ===
blockchain = SimpleBlockchain()

# === Initialize Models (unchanged architectures) ===
global_fedavg = MLP().to(device)
global_fedavgM = deepcopy(global_fedavg)
v_prev = {k: torch.zeros_like(v) for k, v in global_fedavgM.state_dict().items()}
momentum = 0.9

# New models for additional aggregations
global_fedAWA = deepcopy(global_fedavg)
global_moss = deepcopy(global_fedavg)
global_fedADP = deepcopy(global_fedavg)

# === Trackers (renamed to include 'chain' / 'blockchain' term) ===
epochs_chain_history = []

# Global accuracies (renamed)
fedavg_chain_accs, fedavgM_chain_accs = [], []
fedAWA_chain_accs, moss_chain_accs, fedADP_chain_accs = [], [], []

# Local accuracies per client (renamed)
local_chain_accs_fedavg = [[] for _ in range(num_clients)]
local_chain_accs_fedavgM = [[] for _ in range(num_clients)]
local_chain_accs_fedAWA = [[] for _ in range(num_clients)]
local_chain_accs_moss = [[] for _ in range(num_clients)]
local_chain_accs_fedADP = [[] for _ in range(num_clients)]

# Also maintain a log of transactions submitted each round (for inspection)
round_tx_logs = []  # list of dicts per round

# === Helper aggregation functions (kept as before) ===
def fedAWA_aggregate(state_dicts, prev_global):
    """Adaptive weighting by cosine similarity to global model (simulated)."""
    new_state = deepcopy(prev_global)
    for key in new_state.keys():
        updates = torch.stack([sd[key] - prev_global[key] for sd in state_dicts], dim=0)
        # compute a simple alignment score per client: flatten and dot with sign of global update
        flat_updates = updates.view(updates.size(0), -1)
        # sum of absolute update magnitudes per client as a proxy for contribution
        scores = torch.norm(flat_updates, p=2, dim=1)
        weights = torch.softmax(scores, dim=0)
        weighted = torch.sum(weights.view(-1, *[1]*(updates.ndim-1)) * updates, dim=0)
        new_state[key] = prev_global[key] + weighted
    return new_state

def moss_aggregate(state_dicts):
    """Full-weight aggregation (simple mean simulated)."""
    avg_state = deepcopy(state_dicts[0])
    for key in avg_state.keys():
        for i in range(1, len(state_dicts)):
            avg_state[key] += state_dicts[i][key]
        avg_state[key] /= len(state_dicts)
    return avg_state

def fedADP_aggregate(state_dicts, data_sizes):
    """Simulated dynamic aggregation (weighted avg by data sizes)."""
    total_size = sum(data_sizes)
    avg_state = deepcopy(state_dicts[0])
    for key in avg_state.keys():
        avg_state[key] = avg_state[key] * (data_sizes[0]/total_size)
    for i in range(1, len(state_dicts)):
        for key in avg_state.keys():
            avg_state[key] += state_dicts[i][key] * (data_sizes[i]/total_size)
    return avg_state

# === Federated Training Loop with Blockchain-based submission of updates ===
for r in range(num_rounds):
    print(f"\n--- Round {r+1} ---")

    round_txs = []  # transactions for this round (for blockchain block)
    # For each aggregation type we will collect client-submitted transactions separately.
    # To avoid re-training multiple times per client, we'll reuse the local state per client per aggregation type.
    # (The user code already trains separate local models per aggregation; we preserve that to keep behaviors separate.)

    # --- FedAvg (clients train on current global_fedavg) ---
    local_states_avg = []
    for c in range(num_clients):
        local_model = deepcopy(global_fedavg)
        local_state = train_local(local_model, client_loaders[c], local_epochs)
        local_states_avg.append(local_state)
        local_chain_accs_fedavg[c].append(test_model(local_model, client_loaders[c]) * 100)

        # serialize, hash, sign, and create tx
        blob = state_dict_to_bytes(local_state)
        mhash = compute_model_hash(blob)
        sig = sign_model_hash(mhash, client_secret_keys[c])
        tx = {'client_id': c, 'model_hash': mhash, 'signature': sig, 'agg': 'FedAvg'}
        round_txs.append(tx)

    # --- FedAvgM (clients train on global_fedavgM) ---
    local_states_mom = []
    for c in range(num_clients):
        local_model = deepcopy(global_fedavgM)
        local_state = train_local(local_model, client_loaders[c], local_epochs)
        local_states_mom.append(local_state)
        local_chain_accs_fedavgM[c].append(test_model(local_model, client_loaders[c]) * 100)

        blob = state_dict_to_bytes(local_state)
        mhash = compute_model_hash(blob)
        sig = sign_model_hash(mhash, client_secret_keys[c])
        tx = {'client_id': c, 'model_hash': mhash, 'signature': sig, 'agg': 'FedAvgM'}
        round_txs.append(tx)

    # --- FedAWA ---
    local_states_AWA = []
    for c in range(num_clients):
        local_model = deepcopy(global_fedAWA)
        local_state = train_local(local_model, client_loaders[c], local_epochs)
        local_states_AWA.append(local_state)
        local_chain_accs_fedAWA[c].append(test_model(local_model, client_loaders[c]) * 100)

        blob = state_dict_to_bytes(local_state)
        mhash = compute_model_hash(blob)
        sig = sign_model_hash(mhash, client_secret_keys[c])
        tx = {'client_id': c, 'model_hash': mhash, 'signature': sig, 'agg': 'FedAWA'}
        round_txs.append(tx)

    # --- Moss ---
    local_states_moss = []
    for c in range(num_clients):
        local_model = deepcopy(global_moss)
        local_state = train_local(local_model, client_loaders[c], local_epochs)
        local_states_moss.append(local_state)
        local_chain_accs_moss[c].append(test_model(local_model, client_loaders[c]) * 100)

        blob = state_dict_to_bytes(local_state)
        mhash = compute_model_hash(blob)
        sig = sign_model_hash(mhash, client_secret_keys[c])
        tx = {'client_id': c, 'model_hash': mhash, 'signature': sig, 'agg': 'Moss'}
        round_txs.append(tx)

    # --- FedADP ---
    local_states_ADP = []
    for c in range(num_clients):
        local_model = deepcopy(global_fedADP)
        local_state = train_local(local_model, client_loaders[c], local_epochs)
        local_states_ADP.append(local_state)
        local_chain_accs_fedADP[c].append(test_model(local_model, client_loaders[c]) * 100)

        blob = state_dict_to_bytes(local_state)
        mhash = compute_model_hash(blob)
        sig = sign_model_hash(mhash, client_secret_keys[c])
        tx = {'client_id': c, 'model_hash': mhash, 'signature': sig, 'agg': 'FedADP'}
        round_txs.append(tx)

    # === Submit round transactions to blockchain (simulate validation) ===
    # Validate each tx signature before creating the block.
    valid_txs = []
    for tx in round_txs:
        cid = tx['client_id']
        if verify_signature(tx['model_hash'], tx['signature'], client_secret_keys[cid]):
            valid_txs.append(tx)
        else:
            print(f"Warning: invalid signature from client {cid} in round {r+1}, tx ignored.")

    # Create a block that records all valid transactions for this round (aggregations listed per tx)
    prev_hash = blockchain.last_hash()
    block_for_round = SimpleBlock(prev_hash=prev_hash, round_idx=r+1, aggregation_type="client_submissions", txs=valid_txs)
    blockchain.add_block(block_for_round)
    round_tx_logs.append({'round': r+1, 'tx_count': len(valid_txs), 'block_hash': block_for_round.block_hash})

    # === Aggregate (server pulls validated client states and performs aggregation per method) ===
    # Note: In a real system, the server would fetch the actual state blobs from distributed storage (e.g., IPFS)
    # using the model_hash. Here, we already have the local_states lists in memory, so we proceed.

    # FedAvg aggregation (weighted)
    fedavg_state = fedavg_aggregate(local_states_avg, client_data_sizes, global_fedavg.state_dict())
    global_fedavg.load_state_dict(fedavg_state)

    # FedAvgM aggregation (momentum) -- assume fedavg_momentum_aggregate returns (new_state, new_v_prev)
    fedavgM_state, v_prev = fedavg_momentum_aggregate(local_states_mom, client_data_sizes, global_fedavgM.state_dict(), v_prev, momentum)
    global_fedavgM.load_state_dict(fedavgM_state)

    # FedAWA aggregation
    fedAWA_state = fedAWA_aggregate(local_states_AWA, global_fedAWA.state_dict())
    global_fedAWA.load_state_dict(fedAWA_state)

    # Moss aggregation
    moss_state = moss_aggregate(local_states_moss)
    global_moss.load_state_dict(moss_state)

    # FedADP aggregation
    fedADP_state = fedADP_aggregate(local_states_ADP, client_data_sizes)
    global_fedADP.load_state_dict(fedADP_state)

    # === After aggregation: server computes global model hashes and records them on-chain ===
    # Serialize and hash each aggregated global model, sign with server key, and append as a new block.
    aggregates = []
    for agg_name, model in [
        ('FedAvg', global_fedavg),
        ('FedAvgM', global_fedavgM),
        ('FedAWA', global_fedAWA),
        ('Moss', global_moss),
        ('FedADP', global_fedADP)
    ]:
        state_bytes = state_dict_to_bytes(model.state_dict())
        agg_hash = compute_model_hash(state_bytes)
        sig = sign_model_hash(agg_hash, server_secret_key)
        aggregates.append({'aggregation': agg_name, 'aggregate_hash': agg_hash, 'signature': sig})

    # create a block summarizing the aggregated models for this round
    agg_txs = [{'client_id': 'server', 'model_hash': a['aggregate_hash'], 'signature': a['signature'], 'agg': a['aggregation']} for a in aggregates]
    prev_hash = blockchain.last_hash()
    agg_block = SimpleBlock(prev_hash=prev_hash, round_idx=r+1, aggregation_type="server_aggregates", txs=agg_txs)
    blockchain.add_block(agg_block)

    # === Test global models ===
    acc_fedavg = test_model(global_fedavg, test_loader) * 100
    acc_fedavgM = test_model(global_fedavgM, test_loader) * 100
    acc_fedAWA = test_model(global_fedAWA, test_loader) * 100
    acc_moss = test_model(global_moss, test_loader) * 100
    acc_fedADP = test_model(global_fedADP, test_loader) * 100
    print(f"FedAvg: {acc_fedavg:.2f}% | FedAvgM: {acc_fedavgM:.2f}% | FedAWA: {acc_fedAWA:.2f}% | Moss: {acc_moss:.2f}% | FedADP: {acc_fedADP:.2f}%")

    # === Save histories (renamed) ===
    fedavg_chain_accs.append(acc_fedavg)
    fedavgM_chain_accs.append(acc_fedavgM)
    fedAWA_chain_accs.append(acc_fedAWA)
    moss_chain_accs.append(acc_moss)
    fedADP_chain_accs.append(acc_fedADP)
    epochs_chain_history.append((r+1) * local_epochs)

# At the end you have:
# - blockchain.chain : list of SimpleBlock objects (genesis + per-round client_submissions + per-round server_aggregates)
# - round_tx_logs : summary info for each round
# - *_chain_accs and local_chain_accs_* : accuracy histories (renamed to include blockchain term)



--- Round 1 ---
FedAvg: 89.46% | FedAvgM: 89.03% | FedAWA: 89.04% | Moss: 88.63% | FedADP: 88.76%

--- Round 2 ---
FedAvg: 92.17% | FedAvgM: 91.49% | FedAWA: 92.43% | Moss: 92.17% | FedADP: 92.63%

--- Round 3 ---
FedAvg: 93.70% | FedAvgM: 93.10% | FedAWA: 93.83% | Moss: 93.80% | FedADP: 93.80%

--- Round 4 ---
FedAvg: 94.56% | FedAvgM: 93.30% | FedAWA: 94.59% | Moss: 94.39% | FedADP: 94.69%

--- Round 5 ---
FedAvg: 95.20% | FedAvgM: 92.43% | FedAWA: 95.04% | Moss: 94.63% | FedADP: 95.13%

--- Round 6 ---
FedAvg: 95.41% | FedAvgM: 93.50% | FedAWA: 95.26% | Moss: 95.04% | FedADP: 95.34%

--- Round 7 ---
FedAvg: 95.56% | FedAvgM: 94.51% | FedAWA: 95.69% | Moss: 95.57% | FedADP: 95.37%

--- Round 8 ---
FedAvg: 95.86% | FedAvgM: 95.20% | FedAWA: 95.87% | Moss: 96.03% | FedADP: 95.67%

--- Round 9 ---
FedAvg: 95.99% | FedAvgM: 95.64% | FedAWA: 95.90% | Moss: 96.04% | FedADP: 95.94%

--- Round 10 ---
FedAvg: 96.14% | FedAvgM: 95.49% | FedAWA: 96.06% | Moss: 96.06% | FedADP: 95.89%

--- Roun

In [1]:
from datasets import load_dataset
import pandas as pd

In [2]:
#dataset1

# Load the dataset from Hugging Face Hub
train_dataset = load_dataset("kmack/Phishing_urls", split="train")
test_dataset = load_dataset("kmack/Phishing_urls", split="test")
valid_dataset = load_dataset("kmack/Phishing_urls", split="valid")

# Convert to pandas DataFrame
train_df = train_dataset.to_pandas()
test_df = test_dataset.to_pandas()
valid_df = valid_dataset.to_pandas()


all_df = [train_df, test_df,valid_df]
for i, df in enumerate(all_df):
    df['url'] = df['text']
    df.drop('text',axis=1,inplace=True)
print(train_df.head())

   label                                                url
0      0             xenophongroup.com/montjoie/compgns.htm
1      1    www.azzali.eu/&usg=AOvVaw2phVSb_ENMrkATGNx5LQ0l
2      1                     guildmusic.edu.au/js/index.htm
3      1  memo.unexpectedrunner.com/ezxgytw4et\nholotili...
4      0  en.wikipedia.org/wiki/Category:American_televi...


In [3]:
# Special tokens
special_tokens = ['<PAD>', '<UNK>', '<START>', '<END>']

# ASCII printable characters
ascii_chars = [chr(i) for i in range(32, 127)]  # ' ' (space) to '~'

# Full vocabulary
vocab = special_tokens + ascii_chars

# Create mappings
char2idx = {ch: idx for idx, ch in enumerate(vocab)}
idx2char = {idx: ch for ch, idx in char2idx.items()}
vocab_size = len(vocab)
print("Vocabulary size:", vocab_size)
print("Sample:", vocab[:50])


Vocabulary size: 99
Sample: ['<PAD>', '<UNK>', '<START>', '<END>', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M']


In [4]:
train_df = train_df.drop_duplicates(subset =['url'])

In [6]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset
tqdm.pandas()

max_url_length = 50


def encode_url(url):
    indices = torch.tensor([0]*max_url_length)
    for i,c in enumerate(url[:max_url_length]):
        indices[i] = char2idx.get(c, char2idx['<UNK>'])
    #print(indices)  


    return indices

# Apply encoding to your URL column
train_df['encode'] = train_df['url'].progress_apply(encode_url)
test_df['encode'] = test_df['url'].progress_apply(encode_url)
valid_df['encode'] = valid_df['url'].progress_apply(encode_url)



100%|██████████| 535838/535838 [01:11<00:00, 7448.38it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['encode'] = train_df['url'].progress_apply(encode_url)
100%|██████████| 70882/70882 [00:09<00:00, 7496.27it/s]
100%|██████████| 70882/70882 [00:09<00:00, 7503.44it/s]


In [14]:
# =================================
# 🧠 Custom Dataset Class
# =================================
class URLDataset(Dataset):
    def __init__(self, df):
        self.encodes = df['encode'].tolist()
        self.labels = df['label'].tolist()

    def __len__(self):
        return len(self.encodes)

    def __getitem__(self, idx):
        x = self.encodes[idx]
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        return x, y


# =================================
# 🧩 Create PyTorch Datasets
# =================================
train_full = URLDataset(train_df)
test_full = URLDataset(test_df)
valid_full = URLDataset(valid_df)

# =================================
# 🪶 Use Fraction for Faster Training
# =================================
train_fraction = 0.7
test_fraction = 0.7

train_len = int(len(train_full) * train_fraction)
test_len = int(len(test_full) * test_fraction)

train_subset = Subset(train_full, range(train_len))
test_subset = Subset(test_full, range(test_len))

# =================================
# 🧍 Split Among Federated Clients
# =================================


client_data_sizes = ([len(train_subset)//num_clients] * (num_clients - 1)) + [
    len(train_subset) - (len(train_subset)//num_clients)*(num_clients - 1)
]

client_datasets = [
    Subset(train_subset, range(sum(client_data_sizes[:i]), sum(client_data_sizes[:i+1])))
    for i in range(num_clients)
]

client_loaders = [DataLoader(ds, batch_size=batch_size, shuffle=True)
                  for ds in client_datasets]

test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False)

# =================================
# ✅ Verify Setup
# =================================
print(f"Total train samples: {len(train_subset)}")
for i, ds in enumerate(client_datasets):
    print(f"Client {i+1} samples: {len(ds)}")

print(f"Test samples: {len(test_subset)}")

# Example check
for data, label in client_loaders[0]:
    print("Sample batch shape:", data.shape)
    print("Labels:", label)
    break

Total train samples: 375086
Client 1 samples: 125028
Client 2 samples: 125028
Client 3 samples: 125030
Test samples: 49617
Sample batch shape: torch.Size([32, 50])
Labels: tensor([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1,
        0, 1, 0, 1, 1, 0, 0, 1])


In [26]:
# -------------------------
# === Helper Training / Eval ===
# -------------------------
def train_local(model, train_loader, epochs, lr=learning_rate):
    model.to(device)
    model.train()
    optimizer = optim.SGD(model.parameters(), lr=lr)
    criterion = nn.BCEWithLogitsLoss()

    for epoch in range(epochs):
        for xb, yb in train_loader:
            xb = xb.to(device)
            yb = yb.to(device)

            # Ensure float shape (batch, 1)
            if yb.ndim == 1:
                yb = yb.unsqueeze(1)
            yb = yb.float()

            # Ensure inputs in embedding range
            xb = xb.clamp(0, model.embedding.num_embeddings - 1)

            optimizer.zero_grad()
            out = model(xb)
            loss = criterion(out, yb)
            loss.backward()
            optimizer.step()

    return {k: v.cpu() for k, v in model.state_dict().items()}


def test_model(model, loader):
    """
    Evaluate model on given loader. Return accuracy (0-1).
    """
    model.to(device)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(device)
            yb = yb.to(device)
            out = model(xb)
            preds = (torch.sigmoid(out).squeeze(-1) >= 0.5).long()
            correct += (preds == yb).sum().item()
            total += yb.size(0)
    return correct / max(total, 1)

# -------------------------
# === Aggregation helpers ===
# -------------------------
from copy import deepcopy

def fedavg_aggregate(state_dicts, data_sizes, prev_global):
    """Weighted average by data_sizes. state_dicts are CPU tensors."""
    total = sum(data_sizes)
    avg = deepcopy(prev_global)
    for key in avg.keys():
        avg[key] = torch.zeros_like(avg[key])
    for sd, size in zip(state_dicts, data_sizes):
        weight = size / total
        for key in avg.keys():
            avg[key] += sd[key].float() * weight
    return avg

def fedavg_momentum_aggregate(state_dicts, data_sizes, prev_global, v_prev, momentum=0.9):
    """
    FedAvg with server momentum (FedAvgM style).
    Returns (new_state_dict, new_v_prev)
    """
    avg = fedavg_aggregate(state_dicts, data_sizes, prev_global)
    # compute delta = avg - prev_global
    delta = {}
    for k in avg.keys():
        delta[k] = (avg[k].to(device) - prev_global[k].to(device)).clone()
    # update momentum buffer v = momentum * v_prev + delta
    new_v = {}
    new_state = {}
    for k in v_prev.keys():
        v = momentum * v_prev[k].to(device) + delta[k]
        new_v[k] = v.cpu()
        new_state[k] = (prev_global[k].to(device) + v).cpu()
    return new_state, new_v

# Keep previously defined fedAWA, moss, fedADP functions (use yours or the ones below)
def fedAWA_aggregate(state_dicts, prev_global):
    """Adaptive weighting by L2 norm of client updates (simple simulation)."""
    new_state = deepcopy(prev_global)
    # stack flattened update norms
    norms = []
    updates = []
    for sd in state_dicts:
        flat = torch.cat([ (sd[k] - prev_global[k]).view(-1).float() for k in sd.keys() ])
        updates.append(flat)
        norms.append(torch.norm(flat, p=2).item())
    norms = torch.tensor(norms, dtype=torch.float32)
    weights = torch.softmax(norms, dim=0)
    # combine per-parameter
    for key in new_state.keys():
        combined = sum(w * (sd[key] - prev_global[key]).float() for w, sd in zip(weights, state_dicts))
        new_state[key] = (prev_global[key].float() + combined).cpu()
    return new_state

def moss_aggregate(state_dicts):
    """Mean of weights (simple simulation of Moss)."""
    avg_state = deepcopy(state_dicts[0])
    for key in avg_state.keys():
        for i in range(1, len(state_dicts)):
            avg_state[key] = avg_state[key] + state_dicts[i][key]
        avg_state[key] = (avg_state[key] / len(state_dicts)).float()
    return avg_state

def fedADP_aggregate(state_dicts, data_sizes):
    """Weighted avg by data sizes (simulation of FedADP for same-arch clients)."""
    total = sum(data_sizes)
    avg = deepcopy(state_dicts[0])
    for key in avg.keys():
        avg[key] = avg[key].float() * (data_sizes[0] / total)
    for i in range(1, len(state_dicts)):
        for key in avg.keys():
            avg[key] += state_dicts[i][key].float() * (data_sizes[i] / total)
    return avg

In [None]:
# === Initialize Blockchain ===
blockchain = SimpleBlockchain()

# === Initialize Models (CNN-based) ===
global_fedavg = URLBinaryCNN(vocab_size).to(device)
global_fedavgM = deepcopy(global_fedavg)
v_prev = {k: torch.zeros_like(v) for k, v in global_fedavgM.state_dict().items()}
momentum = 0.9

global_fedAWA = deepcopy(global_fedavg)
global_moss = deepcopy(global_fedavg)
global_fedADP = deepcopy(global_fedavg)

# === Blockchain training trackers ===
epochs_chain_history = []
fedavg_chain_accs, fedavgM_chain_accs = [], []
fedAWA_chain_accs, moss_chain_accs, fedADP_chain_accs = [], [], []
local_chain_accs_fedavg = [[] for _ in range(num_clients)]
local_chain_accs_fedavgM = [[] for _ in range(num_clients)]
local_chain_accs_fedAWA = [[] for _ in range(num_clients)]
local_chain_accs_moss = [[] for _ in range(num_clients)]
local_chain_accs_fedADP = [[] for _ in range(num_clients)]
round_tx_logs = []


# === Federated Training Loop with Blockchain Integration ===
for r in range(num_rounds):
    print(f"\n--- Round {r+1} ---")
    round_txs = []

    # ========== Client Updates ==========
    for agg_type, global_model, local_acc_store, store_list in [
        ('FedAvg', global_fedavg, local_chain_accs_fedavg, fedavg_chain_accs),
        ('FedAvgM', global_fedavgM, local_chain_accs_fedavgM, fedavgM_chain_accs),
        ('FedAWA', global_fedAWA, local_chain_accs_fedAWA, fedAWA_chain_accs),
        ('Moss', global_moss, local_chain_accs_moss, moss_chain_accs),
        ('FedADP', global_fedADP, local_chain_accs_fedADP, fedADP_chain_accs)
    ]:
        local_states = []
        for c in range(num_clients):
            local_model = deepcopy(global_model)
            local_state = train_local(local_model, client_loaders[c], local_epochs)
            local_states.append(local_state)
            acc = test_model(local_model, client_loaders[c]) * 100
            local_acc_store[c].append(acc)

            blob = state_dict_to_bytes(local_state)
            mhash = compute_model_hash(blob)
            sig = sign_model_hash(mhash, client_secret_keys[c])
            tx = {'client_id': c, 'model_hash': mhash, 'signature': sig, 'agg': agg_type}
            round_txs.append(tx)

        # save local states for aggregation later
        if agg_type == 'FedAvg': local_states_avg = local_states
        elif agg_type == 'FedAvgM': local_states_mom = local_states
        elif agg_type == 'FedAWA': local_states_AWA = local_states
        elif agg_type == 'Moss': local_states_moss = local_states
        elif agg_type == 'FedADP': local_states_ADP = local_states

    # ========== Blockchain Validation ==========
    valid_txs = []
    for tx in round_txs:
        cid = tx['client_id']
        if verify_signature(tx['model_hash'], tx['signature'], client_secret_keys[cid]):
            valid_txs.append(tx)
        else:
            print(f"⚠️ Invalid signature from client {cid}, ignoring transaction.")

    prev_hash = blockchain.last_hash()
    block_clients = SimpleBlock(prev_hash, r+1, "client_submissions", valid_txs)
    blockchain.add_block(block_clients)
    round_tx_logs.append({'round': r+1, 'tx_count': len(valid_txs), 'block_hash': block_clients.block_hash})

    # ========== Server Aggregation ==========
    fedavg_state = fedavg_aggregate(local_states_avg, client_data_sizes, global_fedavg.state_dict())
    global_fedavg.load_state_dict(fedavg_state)

    fedavgM_state, v_prev = fedavg_momentum_aggregate(local_states_mom, client_data_sizes, global_fedavgM.state_dict(), v_prev, momentum)
    global_fedavgM.load_state_dict(fedavgM_state)

    global_fedAWA.load_state_dict(fedAWA_aggregate(local_states_AWA, global_fedAWA.state_dict()))
    global_moss.load_state_dict(moss_aggregate(local_states_moss))
    global_fedADP.load_state_dict(fedADP_aggregate(local_states_ADP, client_data_sizes))

    # ========== Record Aggregated Models on Chain ==========
    aggregates = []
    for agg_name, model in [
        ('FedAvg', global_fedavg),
        ('FedAvgM', global_fedavgM),
        ('FedAWA', global_fedAWA),
        ('Moss', global_moss),
        ('FedADP', global_fedADP)
    ]:
        state_bytes = state_dict_to_bytes(model.state_dict())
        agg_hash = compute_model_hash(state_bytes)
        sig = sign_model_hash(agg_hash, server_secret_key)
        aggregates.append({'client_id': 'server', 'model_hash': agg_hash, 'signature': sig, 'agg': agg_name})

    prev_hash = blockchain.last_hash()
    block_server = SimpleBlock(prev_hash, r+1, "server_aggregates", aggregates)
    blockchain.add_block(block_server)

    # ========== Evaluate Global Models ==========
    fedavg_chain_accs.append(test_model(global_fedavg, test_loader) * 100)
    fedavgM_chain_accs.append(test_model(global_fedavgM, test_loader) * 100)
    fedAWA_chain_accs.append(test_model(global_fedAWA, test_loader) * 100)
    moss_chain_accs.append(test_model(global_moss, test_loader) * 100)
    fedADP_chain_accs.append(test_model(global_fedADP, test_loader) * 100)

    print(f"✅ Round {r+1} Accuracies:")
    print(f"FedAvg: {fedavg_chain_accs[-1]:.2f}% | FedAvgM: {fedavgM_chain_accs[-1]:.2f}% | "
          f"FedAWA: {fedAWA_chain_accs[-1]:.2f}% | Moss: {moss_chain_accs[-1]:.2f}% | "
          f"FedADP: {fedADP_chain_accs[-1]:.2f}%")

    epochs_chain_history.append((r+1) * local_epochs)


AcceleratorError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [31]:
!set CUDA_LAUNCH_BLOCKING=1   # on Windows cmd
# or

