# Dependências

In [22]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [23]:
import sys

sys.path.append('/content/drive/MyDrive/Colab Notebooks/Minicurso SBRC')  # Path to your folder

In [24]:
!pip install -q transformers sentencepiece evaluate "flwr[simulation]" trl bitsandbytes

In [25]:
!export WANDB_DISABLED=true

# Configuração

# Download e preparação do dataset

- Crie uma pasta dataset no seu drive.
- Copie o conteúdo do diretório compartilhado para a pasta criada:
    - [Link para o drive](https://drive.google.com/drive/folders/1PMvFY9qhADKM5RnN2PdD8muQKeu3urSt?usp=sharing)

# Importações

In [26]:
from flwr.common import Context
from flwr.server import ServerConfig
import torch
import os
import json
from datasets import Dataset
from flwr.client import NumPyClient, ClientApp
from flwr.common import ndarrays_to_parameters
from flwr.server import ServerAppComponents, ServerApp
from flwr.server.strategy import FedAvg
from flwr.simulation import run_simulation

from utils import FitConfigFactory, get_evaluate_fn
from utils import set_parameters, cosine_learning_rate, TraningConfigBuilder, TrainerBuilder, get_parameters, \
    ModelBuilder, get_tokenizer

# Variáveis de configuração da simulação

In [27]:
#paths
testset_path = "/content/drive/MyDrive/Colab Notebooks/Minicurso SBRC/dataset/test.csv"
results_path = "/content/drive/MyDrive/Colab Notebooks/Minicurso SBRC/results"
dataset_path = "/content/drive/MyDrive/Colab Notebooks/Minicurso SBRC/dataset"

#model
model_name = "HuggingFaceTB/SmolLM-135M"
lora_rank = 8
lora = True
initial_lr = 1e-3
min_lr = 1e-5

#training
num_supernodes = 5
num_rounds = 2
fraction_fit = 0.4
fraction_eval = 0.0

train_context_dict = {
    "num-rounds": num_rounds,
    "initial-lr": initial_lr,
    "min-lr": min_lr,
    "dataset-path": dataset_path,
    "results-path": results_path,
    "model-name": model_name,
    "lora": lora
}

#eval
experiment_name = "experimento_sbrc"

eval_context_dict = {
  "model-name": model_name,
  "lora-rank": lora_rank,
  "lora": lora,
  "testset-path": testset_path,
  "results-path": results_path,
  "nrows": None,
  "experiment-name": experiment_name
}

In [28]:
if torch.cuda.is_available():
  device = torch.device("cuda")
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
  torch.cuda.empty_cache()
else:
  device = torch.device("cpu")

# Client

In [29]:
class LLMClient(NumPyClient):
    def __init__(self, cid, model, tokenizer) -> None:
        super().__init__()

        self.cid = cid
        self.model = model
        self.tokenizer = tokenizer

    def fit(self, parameters, config):
        current_round = config["current_round"]
        total_rounds = config["num_rounds"]
        initial_lr = config["initial_lr"]
        min_lr = config["min_lr"]
        dataset_path = config["dataset_path"]
        results_path = config["results_path"]
        model_name = config["model_name"].lower()
        sim_name = "experimento_sbrc"
        lora = config["lora"]

        # Obtém dataset do cliente
        client_dataset = Dataset.load_from_disk(f"{dataset_path}/client_{self.cid}")
        # Obtém o modelo a ser treinado
        set_parameters(self.model, parameters, lora)
        self.model.to(device)

        # Configuração do treinamento
        # Calcula a nova taxa de aprendizado
        new_lr = cosine_learning_rate(current_round=current_round,
                                      total_rounds=total_rounds,
                                      initial_lr=initial_lr,
                                      min_lr=min_lr)
        # Cria a configuração do treinamento
        training_args = TraningConfigBuilder().with_output_dir(results_path).with_logging_dir(
            results_path).with_learning_rate(new_lr).build()

        # Cria objeto responsável por treinar o modelo
        trainer = TrainerBuilder().with_cid(self.cid).with_model(self.model).with_args(
            training_args).with_train_dataset(client_dataset).with_tokenizer(
            self.tokenizer).with_eval_dataset(client_dataset).with_model_name(model_name).build()

        # Realiza treinamento
        print(f"Rodada {current_round}: Treinando Cliente {self.cid} com lr {new_lr}")

        trainer.train()

        parameters = get_parameters(self.model, lora)
        dataset_size = len(client_dataset['labels'])

        print(f"Rodada {current_round}: Cliente {self.cid} treinou")

        # Save losses
        output_dir = f"{results_path}/fl-results/{sim_name}/round_{current_round}/client_{self.cid}"
        os.makedirs(output_dir, exist_ok=True)

        with open(f"{output_dir}/training_losses.json", "w") as f:
            json.dump(trainer.train_losses, f)

        with open(f"{output_dir}/validation_losses.json", "w") as f:
            json.dump(trainer.validation_losses, f)

        return parameters, dataset_size, {}


def client_fn(context: Context):
    """Returns a FlowerClient containing its data partition."""

    initial_model = ModelBuilder().with_model_name(model_name).enable_lora(lora).with_lora_rank(lora_rank).build()
    tokenizer = get_tokenizer(model_name)

    cid = int(context.node_config["partition-id"])

    return LLMClient(cid, initial_model, tokenizer).to_client()


# Servidor

In [30]:
def server_fn(context: Context):
    # Objeto callable para criar configuração do cliente
    on_fit_config_fn = FitConfigFactory(train_context_dict)

    # Construção do modelo global
    global_model = ModelBuilder().with_model_name(model_name).enable_lora(lora).with_lora_rank(lora_rank).build()

    # 1 - Entender a inicialização realizada pelo notebook do allan
    # 2 - Entender a manipulação de parâmetros realizada por ela
    initial_ndarrays = get_parameters(global_model, lora)
    initial_parameters = ndarrays_to_parameters(initial_ndarrays)
    evaluate_fn = get_evaluate_fn(eval_context_dict, device)

    # 1 - próximo passo é usar uma injeção de dependências na inversão de contexto.
    strategy = FedAvg(initial_parameters=initial_parameters, fraction_fit=fraction_fit, fraction_evaluate=fraction_eval,
                      on_fit_config_fn=on_fit_config_fn, evaluate_fn=evaluate_fn)

    # Construct ServerConfig
    config = ServerConfig(num_rounds=num_rounds)

    # Wrap everything into a `ServerAppComponents` object
    return ServerAppComponents(strategy=strategy, config=config)

# Simulação

In [31]:
client_app = ClientApp(client_fn)
server_app = ServerApp(server_fn=server_fn)


In [32]:
# https://flower.ai/docs/framework/how-to-run-simulations.html
run_simulation(
    server_app=server_app, client_app=client_app, num_supernodes=num_supernodes, backend_config={"client_resources": {"num_cpus": 0.25,"num_gpus": 1}}
)

tokenizer_config.json:   0%|          | 0.00/3.69k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/801k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/831 [00:00<?, ?B/s]

[92mINFO [0m:      Starting Flower ServerApp, config: num_rounds=2, no round_timeout
[92mINFO [0m:      
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Using initial global parameters provided by strategy
[92mINFO [0m:      Starting evaluation of initial global parameters
[36m(pid=9831)[0m 2025-05-22 23:00:38.383396: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(pid=9831)[0m E0000 00:00:1747954838.418268    9831 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=9831)[0m E0000 00:00:1747954838.429251    9831 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Round: 0
K: 1
Threshold: 0.983983983983984
F1: 0.8741514360313316
Precision: 0.9147540983606557
Recall: 0.837
Round: 0
K: 3
Threshold: 0.993993993993994
F1: 0.8843020097772949
Precision: 0.9678953626634959
Recall: 0.814
Round: 0
K: 5
Threshold: 0.997997997997998
F1: 0.9356486210418795
Precision: 0.9561586638830898
Recall: 0.916


[92mINFO [0m:      Evaluation returned no results (`None`)
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 1]
[92mINFO [0m:      configure_fit: strategy sampled 2 clients (out of 5)


Round: 0
K: 10
Threshold: 1.0
F1: 0.8934306569343066
Precision: 0.8701421800947867
Recall: 0.918
[36m(ClientAppActor pid=9831)[0m Rodada 1: Treinando Cliente 0 com lr 0.000505


[36m(ClientAppActor pid=9831)[0m   super().__init__(**kwargs)
[36m(ClientAppActor pid=9831)[0m No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:17,  1.91s/it]
 20%|██        | 2/10 [00:02<00:11,  1.39s/it]
 30%|███       | 3/10 [00:03<00:08,  1.22s/it]
 40%|████      | 4/10 [00:04<00:06,  1.14s/it]
 50%|█████     | 5/10 [00:05<00:05,  1.09s/it]
 60%|██████    | 6/10 [00:06<00:04,  1.06s/it]
 70%|███████   | 7/10 [00:07<00:03,  1.05s/it]
 80%|████████  | 8/10 [00:09<00:02,  1.03s/it]
 90%|█████████ | 9/10 [00:10<00:01,  1.03s/it]
100%|██████████| 10/10 [00:11<00:00,  1.03s/it]


[36m(ClientAppActor pid=9831)[0m {'train_runtime': 11.5275, 'train_samples_per_second': 6.94, 'train_steps_per_second': 0.867, 'train_loss': 8.501805114746094, 'epoch': 0.43}
[36m(ClientAppActor pid=9831)[0m Rodada 1: Cliente 0 treinou


[36m(ClientAppActor pid=9831)[0m                                                100%|██████████| 10/10 [00:11<00:00,  1.03s/it]100%|██████████| 10/10 [00:11<00:00,  1.15s/it]
[36m(ClientAppActor pid=9831)[0m   super().__init__(**kwargs)
[36m(ClientAppActor pid=9831)[0m No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


[36m(ClientAppActor pid=9831)[0m Rodada 1: Treinando Cliente 3 com lr 0.000505


  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:09,  1.07s/it]
 20%|██        | 2/10 [00:02<00:08,  1.05s/it]
 30%|███       | 3/10 [00:03<00:07,  1.05s/it]
 40%|████      | 4/10 [00:04<00:06,  1.04s/it]
 50%|█████     | 5/10 [00:05<00:05,  1.03s/it]
 60%|██████    | 6/10 [00:06<00:04,  1.03s/it]
 70%|███████   | 7/10 [00:07<00:03,  1.03s/it]
 80%|████████  | 8/10 [00:08<00:02,  1.03s/it]
 90%|█████████ | 9/10 [00:09<00:01,  1.02s/it]
100%|██████████| 10/10 [00:10<00:00,  1.02s/it]
100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
[92mINFO [0m:      aggregate_fit: received 2 results and 0 failures


[36m(ClientAppActor pid=9831)[0m {'train_runtime': 10.8573, 'train_samples_per_second': 7.368, 'train_steps_per_second': 0.921, 'train_loss': 8.438809204101563, 'epoch': 0.43}
[36m(ClientAppActor pid=9831)[0m Rodada 1: Cliente 3 treinou
Round: 1
K: 1
Threshold: 0.985985985985986
F1: 0.8986415882967607
Precision: 0.9409190371991247
Recall: 0.86
Round: 1
K: 3
Threshold: 0.993993993993994
F1: 0.8945102260495156
Precision: 0.9685314685314685
Recall: 0.831
Round: 1
K: 5
Threshold: 0.996996996996997
F1: 0.9230769230769231
Precision: 0.9473684210526315
Recall: 0.9


[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [ROUND 2]
[92mINFO [0m:      configure_fit: strategy sampled 2 clients (out of 5)


Round: 1
K: 10
Threshold: 1.0
F1: 0.8963503649635036
Precision: 0.8729857819905213
Recall: 0.921
[36m(ClientAppActor pid=9831)[0m Rodada 2: Treinando Cliente 0 com lr 1e-05


[36m(ClientAppActor pid=9831)[0m   super().__init__(**kwargs)
[36m(ClientAppActor pid=9831)[0m No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:09,  1.03s/it]
 20%|██        | 2/10 [00:02<00:08,  1.01s/it]
 30%|███       | 3/10 [00:03<00:07,  1.00s/it]
 40%|████      | 4/10 [00:04<00:06,  1.00s/it]
 50%|█████     | 5/10 [00:05<00:05,  1.00s/it]
 60%|██████    | 6/10 [00:06<00:04,  1.00s/it]
 70%|███████   | 7/10 [00:07<00:03,  1.01s/it]
 80%|████████  | 8/10 [00:08<00:02,  1.01s/it]
 90%|█████████ | 9/10 [00:09<00:01,  1.02s/it]
100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


[36m(ClientAppActor pid=9831)[0m {'train_runtime': 10.6571, 'train_samples_per_second': 7.507, 'train_steps_per_second': 0.938, 'train_loss': 7.408606719970703, 'epoch': 0.43}
[36m(ClientAppActor pid=9831)[0m Rodada 2: Cliente 0 treinou


[36m(ClientAppActor pid=9831)[0m                                                100%|██████████| 10/10 [00:10<00:00,  1.02s/it]100%|██████████| 10/10 [00:10<00:00,  1.07s/it]


[36m(ClientAppActor pid=9831)[0m Rodada 2: Treinando Cliente 1 com lr 1e-05


[36m(ClientAppActor pid=9831)[0m   super().__init__(**kwargs)
[36m(ClientAppActor pid=9831)[0m No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
  0%|          | 0/10 [00:00<?, ?it/s]
 10%|█         | 1/10 [00:01<00:09,  1.05s/it]
 20%|██        | 2/10 [00:02<00:08,  1.03s/it]
 30%|███       | 3/10 [00:03<00:07,  1.03s/it]
 40%|████      | 4/10 [00:04<00:06,  1.03s/it]
 50%|█████     | 5/10 [00:05<00:05,  1.03s/it]
 60%|██████    | 6/10 [00:06<00:04,  1.03s/it]
 70%|███████   | 7/10 [00:07<00:03,  1.04s/it]
 80%|████████  | 8/10 [00:08<00:02,  1.04s/it]
 90%|█████████ | 9/10 [00:09<00:01,  1.04s/it]
100%|██████████| 10/10 [00:10<00:00,  1.05s/it]


[36m(ClientAppActor pid=9831)[0m {'train_runtime': 10.9496, 'train_samples_per_second': 7.306, 'train_steps_per_second': 0.913, 'train_loss': 7.346208190917968, 'epoch': 0.43}


[36m(ClientAppActor pid=9831)[0m                                                100%|██████████| 10/10 [00:10<00:00,  1.05s/it]100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
[92mINFO [0m:      aggregate_fit: received 2 results and 0 failures


[36m(ClientAppActor pid=9831)[0m Rodada 2: Cliente 1 treinou
Round: 2
K: 1
Threshold: 0.985985985985986
F1: 0.8958333333333334
Precision: 0.9347826086956522
Recall: 0.86
Round: 2
K: 3
Threshold: 0.993993993993994
F1: 0.8951048951048951
Precision: 0.9685681024447031
Recall: 0.832
Round: 2
K: 5
Threshold: 0.996996996996997
F1: 0.9230769230769231
Precision: 0.9473684210526315
Recall: 0.9


[92mINFO [0m:      configure_evaluate: no clients selected, skipping evaluation
[92mINFO [0m:      
[92mINFO [0m:      [SUMMARY]
[92mINFO [0m:      Run finished 2 round(s) in 441.69s
[92mINFO [0m:      


Round: 2
K: 10
Threshold: 1.0
F1: 0.8958130477117819
Precision: 0.872865275142315
Recall: 0.92
