In [1]:
# ============================================================
# 0) Imports & basic config
# ============================================================
import os, json, csv, pickle, re
from typing import List
from collections import OrderedDict, Counter

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, Subset, DataLoader

import flwr as fl
from flwr.common import parameters_to_ndarrays, ndarrays_to_parameters

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = True
torch.cuda.empty_cache()



In [2]:
# ---- CPU-only for training/eval; do NOT hide GPU globally ----
import torch

DEVICE_CLIENT = torch.device("cpu")   # single source of truth for ALL training/eval
DEVICE = DEVICE_CLIENT                # used by train/evaluate/Flower code
DEVICE_LLM = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ------- Paths & constants -------
PATH         = "llm-test-1"
os.makedirs(PATH, exist_ok=True)

ROUNDS       = 3
NUM_CLIENTS  = 10
BATCH_SIZE   = 64
BATCH_ROUND  = 16       # total batches per client segment (BATCH_ROUND * BATCH_SIZE = per-client slice length)
SIZE_ROUND   = int(BATCH_ROUND * BATCH_SIZE * NUM_CLIENTS)
DATA_GROUPS  = 8


In [3]:
# ============================================================
# 1) Load IoT dataset (your exact recipe)
# ============================================================
TrafficData = {'Dataset': {}}
sets_names = ['30','100','70','50','testing']

for DATA_NUM in sets_names:
    TrafficData['Dataset'][DATA_NUM] = pd.read_csv(
        f'data/2_Dataset_5_Attack_{DATA_NUM}_normal.csv',
        low_memory=False, quoting=csv.QUOTE_NONE, on_bad_lines='skip'
    )
    print(DATA_NUM, TrafficData['Dataset'][DATA_NUM].shape)

# shuffle for randomness
for DATA_NUM in TrafficData['Dataset']:
    TrafficData['Dataset'][DATA_NUM] = (
        TrafficData['Dataset'][DATA_NUM].sample(frac=1, random_state=42).reset_index(drop=True)
    )

# split each training set into DATA_GROUPS parts
TrafficData['Split'] = {}
sets_training = ['30','100','70','50']
for DATA_NUM in sets_training:
    TrafficData['Split'][DATA_NUM] = np.array_split(TrafficData['Dataset'][DATA_NUM], DATA_GROUPS)

# interleave groups vertically (faster: build list and concat once)
frames = []
for GROUP in range(DATA_GROUPS):
    frames.extend([
        TrafficData['Split']['30'][GROUP],
        TrafficData['Split']['100'][GROUP],
        TrafficData['Split']['70'][GROUP],
        TrafficData['Split']['50'][GROUP],
    ])
TrafficData['Combined'] = pd.concat(frames, ignore_index=True)
print("Combined shape:", TrafficData['Combined'].shape)

# split into Train/Test (features: all but last; label: last column)
TrafficData['Train'] = {
    'X': TrafficData['Combined'].iloc[:, :-1],
    'y': TrafficData['Combined'].iloc[:, -1]
}
TrafficData['Test'] = {
    'X': TrafficData['Dataset']['testing'].iloc[:, :-1],
    'y': TrafficData['Dataset']['testing'].iloc[:, -1]
}

# scale features
scaler = MinMaxScaler()
scaler.fit(TrafficData['Train']['X'])
TrafficData['Train']['X'] = scaler.transform(TrafficData['Train']['X'])
TrafficData['Test']['X']  = scaler.transform(TrafficData['Test']['X'])

# to numpy arrays
TrafficData['Train']['X'] = np.array(TrafficData['Train']['X'])
TrafficData['Train']['y'] = np.array(TrafficData['Train']['y'])
TrafficData['Test']['X']  = np.array(TrafficData['Test']['X'])
TrafficData['Test']['y']  = np.array(TrafficData['Test']['y'])

# round windows
TrafficData['ROUNDS'] = {}
SIZE_Demo = SIZE_ROUND
for ROUND in range(1, ROUNDS + 1):
    if ROUND == 1:
        TrafficData['ROUNDS'][ROUND] = {
            'X': TrafficData['Train']['X'][:SIZE_Demo],
            'y': TrafficData['Train']['y'][:SIZE_Demo],
        }
    else:
        TrafficData['ROUNDS'][ROUND] = {
            'X': TrafficData['Train']['X'][(SIZE_Demo - SIZE_ROUND):SIZE_Demo],
            'y': TrafficData['Train']['y'][(SIZE_Demo - SIZE_ROUND):SIZE_Demo],
        }
    SIZE_Demo += SIZE_ROUND

30 (184320, 99)
100 (184320, 99)
70 (184320, 99)
50 (184320, 99)
testing (120000, 99)


  return bound(*args, **kwds)


Combined shape: (737280, 99)


In [4]:
# ============================================================
# 2) Torch Datasets & Federated loaders
# ============================================================
class ClassifierDataset(Dataset):
    def __init__(self, X_data, y_data):
        self.X_data = torch.from_numpy(X_data).float()
        self.y_data = torch.from_numpy(y_data).long()
    def __getitem__(self, idx):
        return self.X_data[idx], self.y_data[idx]
    def __len__(self):
        return len(self.X_data)

TrafficData['trainsets'] = {
    r: ClassifierDataset(TrafficData['ROUNDS'][r]['X'], TrafficData['ROUNDS'][r]['y'])
    for r in range(1, ROUNDS + 1)
}
TrafficData['testset'] = ClassifierDataset(TrafficData['Test']['X'], TrafficData['Test']['y'])

def load_train(num_clients, ROUND):
    # Each client gets a contiguous slice of SIZE_ROUND
    portion_size = int(BATCH_ROUND * BATCH_SIZE)  # per-client samples
    portion_indices = [
        list(range(i * portion_size, min((i + 1) * portion_size, SIZE_ROUND)))
        for i in range(num_clients)
    ]
    portion_datasets = [Subset(TrafficData['trainsets'][ROUND], idx) for idx in portion_indices]
    return [DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False) for ds in portion_datasets]

def load_test():
    return DataLoader(TrafficData['testset'], batch_size=BATCH_SIZE, shuffle=False)

Dataloaders = {r: load_train(NUM_CLIENTS, r) for r in range(1, ROUNDS + 1)}
Dataloaders['Test'] = load_test()

In [5]:
# ============================================================
# 3) Model, train/test
# ============================================================
class Net(nn.Module):
    def __init__(self, dropout=0.2):
        super().__init__()
        # 98 features, 15 classes (per your earlier Net)
        self.layer_1 = nn.Linear(98, 64)
        self.layer_2 = nn.Linear(64, 32)
        self.layer_out = nn.Linear(32, 15)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
    def forward(self, x):
        x = self.layer_1(x); x = self.relu(x); x = self.dropout(x)
        x = self.layer_2(x); x = self.relu(x); x = self.dropout(x)
        return self.layer_out(x)

def train_once(net, loader, epochs, lr, weight_decay=0.0):
    crit = nn.CrossEntropyLoss()
    opt  = optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay)
    net.train()
    for ep in range(epochs):
        total, correct, loss_sum = 0, 0, 0.0
        for X, y in loader:
            X, y = X.to(DEVICE), y.to(DEVICE)
            opt.zero_grad()
            out = net(X)
            loss = crit(out, y)
            loss.backward()
            opt.step()
            loss_sum += loss.item()
            total += y.size(0)
            correct += (torch.max(out, 1)[1] == y).sum().item()
        print(f"  Epoch {ep+1}/{epochs} — Loss={loss_sum:.4f}, Acc={correct/total:.4f}")

def evaluate(net, loader):
    crit = nn.CrossEntropyLoss()
    net.eval()
    total, correct, loss_sum = 0, 0, 0.0
    with torch.no_grad():
        for X, y in loader:
            X, y = X.to(DEVICE), y.to(DEVICE)
            out = net(X)
            loss_sum += crit(out, y).item()
            pred = torch.max(out, 1)[1]
            correct += (pred == y).sum().item()
            total += y.size(0)
    acc  = correct / total
    loss = loss_sum / total
    print(f"Validation — Loss={loss:.4f}, Acc={acc:.4f}")
    return {"val_accuracy": float(acc), "val_loss": float(loss)}

def get_parameters(net) -> List[np.ndarray]:
    return [v.detach().cpu().numpy() for _, v in net.state_dict().items()]

def set_parameters(net, params: List[np.ndarray]):
    net.load_state_dict(OrderedDict({k: torch.tensor(v) for k, v in zip(net.state_dict().keys(), params)}))

In [9]:
# ============================================================
# 4) LLM controller (Phi-3-mini – GPU/CPU hybrid safe loader)
# ============================================================
import os, re, json, torch
from functools import lru_cache
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

USE_OFFLINE_LLM = True
if USE_OFFLINE_LLM:
    os.environ["TRANSFORMERS_OFFLINE"] = "1"
    os.environ["HF_HUB_OFFLINE"] = "1"
os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/hf_cache")

LLM_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"

device_llm = "cuda" if torch.cuda.is_available() else "cpu"
dtype_llm  = torch.float16 if device_llm == "cuda" else torch.float32
print(f"Loading {LLM_MODEL_ID} with mixed device_map (GPU+CPU fallback)...")

@lru_cache(maxsize=1)
def _get_llm_and_tok():
    tok = AutoTokenizer.from_pretrained(LLM_MODEL_ID, use_fast=True)
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token

    # ✅ Safe load: use accelerate to automatically split model across GPU/CPU
    llm = AutoModelForCausalLM.from_pretrained(
        LLM_MODEL_ID,
        dtype=dtype_llm,
        device_map="auto",             # let Accelerate place layers automatically
        offload_folder="/tmp/phi3_offload",  # offload overflow to CPU RAM
        low_cpu_mem_usage=True,
    )
    return llm, tok

llm, tok = _get_llm_and_tok()
# ⚠️ Do NOT set device manually; accelerate already handles placement
llm_pipe = pipeline("text-generation", model=llm, tokenizer=tok, dtype=dtype_llm)

print("✅ Phi-3 text-generation pipeline ready (hybrid GPU/CPU).")

GEN_KW = dict(max_new_tokens=60, temperature=0.0, do_sample=False)

def _extract_json(text: str) -> dict:
    m = re.search(r"\{.*\}", text, flags=re.S)
    if not m:
        return {}
    try:
        return json.loads(m.group(0))
    except Exception:
        return {}

def llm_review_and_suggest(cfg: dict, metrics: dict, data_info: dict) -> dict:
    compact_metrics = {k: v for k, v in metrics.items() if isinstance(v, (int, float))}
    prompt = (
        "You are an ML optimization assistant for a federated learning system.\n"
        "Optimize hyperparameters to improve validation accuracy and stability.\n"
        "Avoid overfitting. Return JSON ONLY with: learning_rate, epochs, dropout, weight_decay.\n\n"
        f"CURRENT_CONFIG:\n{json.dumps(cfg)}\n\n"
        f"METRICS:\n{json.dumps(compact_metrics)}\n\n"
        f"DATA_INFO:\n{json.dumps(data_info)}\n"
    )
    out = llm_pipe(prompt, **GEN_KW)[0]["generated_text"]
    res = _extract_json(out)
    return res or {
        "learning_rate": cfg.get("learning_rate", 1e-3),
        "epochs": cfg.get("epochs", 1),
        "dropout": cfg.get("dropout", 0.2),
        "weight_decay": cfg.get("weight_decay", 0.0),
    }


Loading microsoft/Phi-3-mini-4k-instruct with mixed device_map (GPU+CPU fallback)...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu


✅ Phi-3 text-generation pipeline ready (hybrid GPU/CPU).


In [10]:
# ============================================================
# 5) Flower client & custom strategy that saves GlobalModel_{round}.pth
# ============================================================
import gc, ray

# ---- Do NOT hide GPU globally; keep training on CPU via DEVICE and client_resources ----
DEVICE = torch.device("cpu")
DEVICE_CLIENT = DEVICE

def clear_memory():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
    ray.shutdown()



class FlowerClient(fl.client.NumPyClient):
    def __init__(self, cid, net, loader, round_id):
        self.cid = cid
        self.net = net
        self.loader = loader
        self.round_id = round_id

    def get_parameters(self, config):
        return get_parameters(self.net)

    def fit(self, parameters, config):
        set_parameters(self.net, parameters)
        train_once(
            self.net,
            self.loader,
            epochs=int(config["local_epochs"]),
            lr=float(config["learning_rate"]),
            weight_decay=float(TRAIN_CFG.get("weight_decay", 0.0)),
        )
        return get_parameters(self.net), len(self.loader.dataset), {}

    def evaluate(self, parameters, config):
        # not used for centralized eval in this setup
        return 0.0, len(self.loader.dataset), {}


Global_Models = {}

class SaveModelStrategy(fl.server.strategy.FedAvg):
    def __init__(self, round_id, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.round_id = round_id

    def aggregate_fit(self, rnd, results, failures):
        aggregated_parameters_tuple = super().aggregate_fit(rnd, results, failures)
        if aggregated_parameters_tuple is None:
            return None

        aggregated_parameters, _ = aggregated_parameters_tuple
        if aggregated_parameters is not None:
            weights = parameters_to_ndarrays(aggregated_parameters)
            model = Net(dropout=TRAIN_CFG["dropout"]).to(DEVICE)
            set_parameters(model, weights)
            os.makedirs(PATH, exist_ok=True)
            torch.save(model.state_dict(), f"{PATH}/GlobalModel_{self.round_id}.pth")
            Global_Models[self.round_id] = model
        return aggregated_parameters_tuple


def fit_config(server_round: int):
    return {
        "current_round": server_round,
        "local_epochs": TRAIN_CFG["epochs"],
        "learning_rate": TRAIN_CFG["learning_rate"],
        "batch_size": TRAIN_CFG["batch_size"],
        "dropout": TRAIN_CFG["dropout"],
        "weight_decay": TRAIN_CFG["weight_decay"],
    }

In [11]:
# ============================================================
# Training configuration (LLM will adjust this each round)
# ============================================================
TRAIN_CFG = {
    "learning_rate": 1e-3,
    "epochs": 1,
    "dropout": 0.2,
    "weight_decay": 0.0,
    "batch_size": BATCH_SIZE,
}

def _clip(v, lo, hi):
    try:
        return max(lo, min(hi, v))
    except:
        return v

def apply_llm_suggestions(sugg: dict, cfg: dict) -> dict:
    new = dict(cfg)
    if "learning_rate" in sugg:
        new["learning_rate"] = float(_clip(float(sugg["learning_rate"]), 1e-5, 5e-3))
    if "epochs" in sugg:
        new["epochs"] = int(_clip(int(sugg["epochs"]), 1, 5))
    if "dropout" in sugg:
        new["dropout"] = float(_clip(float(sugg["dropout"]), 0.0, 0.6))
    if "weight_decay" in sugg:
        new["weight_decay"] = float(_clip(float(sugg["weight_decay"]), 0.0, 0.1))
    return new

def on_round_end(round_idx: int, round_metrics: dict, data_info: dict):
    global TRAIN_CFG
    print(f"\n[LLM] Reviewing round {round_idx}…")
    sugg = llm_review_and_suggest(TRAIN_CFG, round_metrics, data_info)
    if not sugg:
        print("[LLM] No JSON suggestions; keeping config.")
        return TRAIN_CFG
    TRAIN_CFG = apply_llm_suggestions(sugg, TRAIN_CFG)
    print(f"[LLM] Updated TRAIN_CFG: {TRAIN_CFG}")
    return TRAIN_CFG


In [12]:
# ============================================================
# 6) Orchestration: run FL rounds, evaluate, LLM tune
# ============================================================
Global_Models[0] = Net(dropout=TRAIN_CFG["dropout"]).to(DEVICE)
init_path = f"{PATH}/0_Input_Random_model_Net.pth"
if not os.path.exists(init_path):
    torch.save(Global_Models[0].state_dict(), init_path)
else:
    Global_Models[0].load_state_dict(torch.load(init_path, map_location=DEVICE))
Global_Models[0].train()

print("\n===== Starting Federated Training =====")
for rnd in range(1, ROUNDS + 1):
    print(f"\n--- Round {rnd} ---")

    # custom strategy per round
    strategy = SaveModelStrategy(
        round_id=rnd,
        fraction_fit=1.0,
        min_fit_clients=NUM_CLIENTS,
        min_available_clients=NUM_CLIENTS,
        on_fit_config_fn=fit_config,
        initial_parameters=ndarrays_to_parameters(get_parameters(Global_Models[rnd - 1])),
    )

    def client_fn(cid: str, round_id=rnd):
        cid_int = int(cid)
        loader = Dataloaders[round_id][cid_int % NUM_CLIENTS]
        net = Net(dropout=TRAIN_CFG["dropout"]).to(DEVICE)
        return FlowerClient(cid, net, loader, round_id)

    # ---- safer simulation setup ----
    fl.simulation.start_simulation(
        client_fn=client_fn,
        num_clients=NUM_CLIENTS,
        config=fl.server.ServerConfig(num_rounds=1),
        strategy=strategy,
        client_resources={"num_cpus": 1, "num_gpus": 0},
        ray_init_args={"num_cpus": 1, "num_gpus": 0, "include_dashboard": False},
    )

    # ---- load aggregated global model ----
    model_path = f"{PATH}/GlobalModel_{rnd}.pth"
    if os.path.exists(model_path):
        Global_Models[rnd] = Net(dropout=TRAIN_CFG["dropout"]).to(DEVICE)
        Global_Models[rnd].load_state_dict(torch.load(model_path, map_location=DEVICE))
        Global_Models[rnd].eval()
    else:
        print(f"⚠️ WARNING: {model_path} not found, reusing previous model.")
        Global_Models[rnd] = Global_Models[rnd - 1]

    # ---- Evaluate safely ----
    metrics = evaluate(Global_Models[rnd], Dataloaders['Test'])

    # if zero division risk (no test samples), patch result
    if not metrics or "val_accuracy" not in metrics or np.isnan(metrics["val_accuracy"]):
        metrics = {"val_accuracy": 0.0, "val_loss": 1.0}

    # ---- LLM tuning step ----
    cfg_snapshot = dict(TRAIN_CFG)
    data_info = {
        "n_clients": NUM_CLIENTS,
        "round_size": SIZE_ROUND,
        "batch_size": TRAIN_CFG["batch_size"],
        "n_features": int(TrafficData['Test']['X'].shape[1]),
        "n_classes": 15,
    }

    TRAIN_CFG = on_round_end(rnd, metrics, data_info)
    clear_memory()

print("\n===== Training Complete =====")
print("Saved models under:", PATH)

INFO flwr 2025-11-05 02:57:22,032 | app.py:175 | Starting Flower simulation, config: ServerConfig(num_rounds=1, round_timeout=None)



===== Starting Federated Training =====

--- Round 1 ---


2025-11-05 02:57:28,874	INFO worker.py:2012 -- Started a local Ray instance.
INFO flwr 2025-11-05 02:57:31,207 | app.py:210 | Flower VCE: Ray initialized with resources: {'object_store_memory': 2324035584.0, 'memory': 5422749696.0, 'node:10.192.11.216': 1.0, 'CPU': 1.0, 'node:__internal_head__': 1.0}
INFO flwr 2025-11-05 02:57:31,207 | app.py:224 | Flower VCE: Resources for each Virtual Client: {'num_cpus': 1, 'num_gpus': 0}
INFO flwr 2025-11-05 02:57:31,376 | app.py:270 | Flower VCE: Creating VirtualClientEngineActorPool with 1 actors
INFO flwr 2025-11-05 02:57:31,379 | server.py:89 | Initializing global parameters
INFO flwr 2025-11-05 02:57:31,380 | server.py:272 | Using initial parameters provided by strategy
INFO flwr 2025-11-05 02:57:31,381 | server.py:91 | Evaluating initial parameters
INFO flwr 2025-11-05 02:57:31,381 | server.py:104 | FL starting
DEBUG flwr 2025-11-05 02:57:31,383 | server.py:222 | fit_round 1: strategy sampled 10 clients (out of 10)


[36m(DefaultActor pid=138066)[0m   Epoch 1/1 — Loss=41.3321, Acc=0.2568
[36m(DefaultActor pid=138066)[0m   Epoch 1/1 — Loss=41.3030, Acc=0.2578
[36m(DefaultActor pid=138066)[0m   Epoch 1/1 — Loss=41.2510, Acc=0.2686
[36m(DefaultActor pid=138066)[0m   Epoch 1/1 — Loss=41.2953, Acc=0.2607
[36m(DefaultActor pid=138066)[0m   Epoch 1/1 — Loss=41.3466, Acc=0.2666
[36m(DefaultActor pid=138066)[0m   Epoch 1/1 — Loss=41.3159, Acc=0.2637
[36m(DefaultActor pid=138066)[0m   Epoch 1/1 — Loss=41.2820, Acc=0.2510
[36m(DefaultActor pid=138066)[0m   Epoch 1/1 — Loss=41.3848, Acc=0.2666


DEBUG flwr 2025-11-05 02:57:38,330 | server.py:236 | fit_round 1 received 10 results and 0 failures
DEBUG flwr 2025-11-05 02:57:38,373 | server.py:173 | evaluate_round 1: strategy sampled 10 clients (out of 10)


[36m(DefaultActor pid=138066)[0m   Epoch 1/1 — Loss=41.3051, Acc=0.2568
[36m(DefaultActor pid=138066)[0m   Epoch 1/1 — Loss=41.3202, Acc=0.2656


DEBUG flwr 2025-11-05 02:57:40,915 | server.py:187 | evaluate_round 1 received 10 results and 0 failures
INFO flwr 2025-11-05 02:57:40,917 | server.py:153 | FL finished in 9.535465009999825
INFO flwr 2025-11-05 02:57:40,919 | app.py:225 | app_fit: losses_distributed [(1, 0.0)]
INFO flwr 2025-11-05 02:57:40,921 | app.py:226 | app_fit: metrics_distributed_fit {}
INFO flwr 2025-11-05 02:57:40,923 | app.py:227 | app_fit: metrics_distributed {}
INFO flwr 2025-11-05 02:57:40,923 | app.py:228 | app_fit: losses_centralized []
INFO flwr 2025-11-05 02:57:40,925 | app.py:229 | app_fit: metrics_centralized {}


Validation — Loss=0.0383, Acc=0.5000

[LLM] Reviewing round 1…


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
[36m(pid=gcs_server)[0m [2025-11-05 02:57:52,619 E 137443 137443] (gcs_server) gcs_server.cc:302: Failed to establish connection to the event+metrics exporter agent. Events and metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[33m(raylet)[0m [2025-11-05 02:57:57,890 E 137864 137864] (raylet) main.cc:975: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[2025-11-05 02:58:01,196 E 127744 138063] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[36m(DefaultAc

[LLM] Updated TRAIN_CFG: {'learning_rate': 0.001, 'epochs': 1, 'dropout': 0.2, 'weight_decay': 0.0, 'batch_size': 64}


INFO flwr 2025-11-05 03:20:03,649 | app.py:175 | Starting Flower simulation, config: ServerConfig(num_rounds=1, round_timeout=None)



--- Round 2 ---


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

[36m(DefaultActor pid=192184)[0m   Epoch 1/1 — Loss=37.9567, Acc=0.3828
[36m(DefaultActor pid=192184)[0m   Epoch 1/1 — Loss=38.1049, Acc=0.3486
[36m(DefaultActor pid=192184)[0m   Epoch 1/1 — Loss=38.0187, Acc=0.3701
[36m(DefaultActor pid=192184)[0m   Epoch 1/1 — Loss=37.8806, Acc=0.3750
[36m(DefaultActor pid=192184)[0m   Epoch 1/1 — Loss=37.9843, Acc=0.3789
[36m(DefaultActor pid=192184)[0m   Epoch 1/1 — Loss=38.1612, Acc=0.3652
[36m(DefaultActor pid=192184)[0m   Epoch 1/1 — Loss=38.0750, Acc=0.3721
[36m(DefaultActor pid=192184)[0m   Epoch 1/1 — Loss=37.9250, Acc=0.3838


DEBUG flwr 2025-11-05 03:20:16,421 | server.py:236 | fit_round 1 received 10 results and 0 failures
DEBUG flwr 2025-11-05 03:20:16,431 | server.py:173 | evaluate_round 1: strategy sampled 10 clients (out of 10)


[36m(DefaultActor pid=192184)[0m   Epoch 1/1 — Loss=37.8788, Acc=0.3867
[36m(DefaultActor pid=192184)[0m   Epoch 1/1 — Loss=38.1172, Acc=0.3564


DEBUG flwr 2025-11-05 03:20:18,701 | server.py:187 | evaluate_round 1 received 10 results and 0 failures
INFO flwr 2025-11-05 03:20:18,703 | server.py:153 | FL finished in 8.053580404000058
INFO flwr 2025-11-05 03:20:18,704 | app.py:225 | app_fit: losses_distributed [(1, 0.0)]
INFO flwr 2025-11-05 03:20:18,704 | app.py:226 | app_fit: metrics_distributed_fit {}
INFO flwr 2025-11-05 03:20:18,705 | app.py:227 | app_fit: metrics_distributed {}
INFO flwr 2025-11-05 03:20:18,705 | app.py:228 | app_fit: losses_centralized []
INFO flwr 2025-11-05 03:20:18,706 | app.py:229 | app_fit: metrics_centralized {}


Validation — Loss=0.0325, Acc=0.5860

[LLM] Reviewing round 2…


[36m(pid=gcs_server)[0m [2025-11-05 03:20:34,024 E 191722 191722] (gcs_server) gcs_server.cc:302: Failed to establish connection to the event+metrics exporter agent. Events and metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[33m(raylet)[0m [2025-11-05 03:20:37,541 E 192140 192140] (raylet) main.cc:975: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[2025-11-05 03:20:40,631 E 127744 192183] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[36m(DefaultActor pid=192184)[0m [2025-11-05 03:20:40,617 E 192184 192266] core_worker_process.cc:825: Failed to establish connection to the metric

[LLM] Updated TRAIN_CFG: {'learning_rate': 0.001, 'epochs': 1, 'dropout': 0.2, 'weight_decay': 0.0, 'batch_size': 64}


INFO flwr 2025-11-05 03:42:45,710 | app.py:175 | Starting Flower simulation, config: ServerConfig(num_rounds=1, round_timeout=None)



--- Round 3 ---


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

[36m(DefaultActor pid=251946)[0m   Epoch 1/1 — Loss=26.4977, Acc=0.6934
[36m(DefaultActor pid=251946)[0m   Epoch 1/1 — Loss=32.2567, Acc=0.3965
[36m(DefaultActor pid=251946)[0m   Epoch 1/1 — Loss=21.2901, Acc=0.9990


ERROR flwr 2025-11-05 03:42:58,541 | ray_client_proxy.py:147 | Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.12/site-packages/flwr/simulation/ray_transport/ray_client_proxy.py", line 140, in _submit_job
    res = self.actor_pool.get_client_result(self.cid, timeout)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.12/site-packages/flwr/simulation/ray_transport/ray_actor.py", line 402, in get_client_result
    return self._fetch_future_result(cid)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.12/site-packages/flwr/simulation/ray_transport/ray_actor.py", line 288, in _fetch_future_result
    res_cid, res = ray.get(future)  # type: (str, ClientRes)
                   ^^^^^^^^^^^^^^^
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.12/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
 

Validation — Loss=0.0246, Acc=0.5000

[LLM] Reviewing round 3…


[36m(pid=gcs_server)[0m [2025-11-05 03:43:16,199 E 251789 251789] (gcs_server) gcs_server.cc:302: Failed to establish connection to the event+metrics exporter agent. Events and metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[33m(raylet)[0m [2025-11-05 03:43:20,934 E 251902 251902] (raylet) main.cc:975: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[2025-11-05 03:43:24,102 E 127744 251945] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[33m(raylet)[0m [2025-11-05 03:43:50,938 E 251902 251902] (raylet) node_manager.cc:3252: 1 Workers (tasks / actors) killed due to memory pressure (

[LLM] Updated TRAIN_CFG: {'learning_rate': 0.001, 'epochs': 1, 'dropout': 0.2, 'weight_decay': 0.0, 'batch_size': 64}

===== Training Complete =====
Saved models under: llm-test-1
