# Orchestrator Test

Smoke test for `ModelsOrchestrator` — data loading, MLP training, LightGBM training, and combined prediction.

In [34]:
import sys
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score, precision_score, recall_score

from pathlib import Path
sys.path.append(str(Path.cwd().parent))

from models_classes.models_orchestrator import ModelsOrchestrator
from models_classes.mlp_disease_neural_net import device

print(f'PyTorch: {torch.__version__}')
print(f'Using device: {device}')

PyTorch: 2.10.0+cu128
Using device: cuda


## 1. Load data

In [35]:
orchestrator = ModelsOrchestrator(type_disease='dengue')

x_train_cat, x_test_cat, x_train_num, x_test_num, y_train, y_test, embedding_sizes = orchestrator.prepare_data()
numerical_columns = orchestrator.df.drop(columns=list(orchestrator.categorical_columns) + ['final_classification']).columns

print(f'Train: {len(y_train)} | Test: {len(y_test)}')
print(f'Categorical features: {len(orchestrator.categorical_columns)}')
print(f'Numerical features:   {len(numerical_columns)}')

Colunas removidas (>99% mesmo valor): 14
['blood_disorder', 'liver_disease', 'kidney_disease', 'peptic_ulcer', 'autoimmune_disease', 'nosebleed', 'gum_bleeding', 'metrorrhagia', 'petechiae_hemorrh', 'hematuria', 'other_bleeding', 'symptom_year_end', 'hemorrhagic_count', 'conjunctivitis_and_petechiae']
Train: 2586811 | Test: 287424
Categorical features: 9
Numerical features:   85


## 2. Train MLP

In [36]:
mlp_model = orchestrator.train_mlp(embedding_sizes=embedding_sizes,save_path='C:\\Users\\angej\\Documents\\2_Programação\\health_index_project\\models_saved\\best_orchestrator_mlp.pth')

Epoch   0 | Train: 0.5176 | Val: 0.4849 | LR: 0.000100
Epoch   1 | Train: 0.4932 | Val: 0.4750 | LR: 0.000100
Epoch   2 | Train: 0.4842 | Val: 0.4703 | LR: 0.000100
Epoch   3 | Train: 0.4788 | Val: 0.4669 | LR: 0.000100
Epoch   4 | Train: 0.4750 | Val: 0.4646 | LR: 0.000100
Epoch   5 | Train: 0.4719 | Val: 0.4625 | LR: 0.000100
Epoch   6 | Train: 0.4694 | Val: 0.4609 | LR: 0.000100
Epoch   7 | Train: 0.4675 | Val: 0.4591 | LR: 0.000100
Epoch   8 | Train: 0.4655 | Val: 0.4578 | LR: 0.000100
Epoch   9 | Train: 0.4640 | Val: 0.4566 | LR: 0.000100
Epoch  10 | Train: 0.4625 | Val: 0.4555 | LR: 0.000100
Epoch  11 | Train: 0.4611 | Val: 0.4544 | LR: 0.000100
Epoch  12 | Train: 0.4597 | Val: 0.4533 | LR: 0.000100
Epoch  13 | Train: 0.4584 | Val: 0.4526 | LR: 0.000100
Epoch  14 | Train: 0.4573 | Val: 0.4517 | LR: 0.000100
Epoch  15 | Train: 0.4562 | Val: 0.4507 | LR: 0.000100
Epoch  16 | Train: 0.4551 | Val: 0.4504 | LR: 0.000100
Epoch  17 | Train: 0.4543 | Val: 0.4496 | LR: 0.000100
Epoch  18 

### 2.1 MLP evaluation

In [37]:
display(mlp_model.evaluate(orchestrator.test_loader, orchestrator.y_test))

 Threshold |   Accuracy |  Precision |     Recall |         F1
------------------------------------------------------------
      0.30 |     0.7340 |     0.7121 |     0.9139 |     0.8005
      0.35 |     0.7413 |     0.7293 |     0.8856 |     0.7999
      0.40 |     0.7442 |     0.7456 |     0.8530 |     0.7957
      0.45 |     0.7429 |     0.7617 |     0.8145 |     0.7872
      0.50 |     0.7372 |     0.7778 |     0.7698 |     0.7738
      0.55 |     0.7277 |     0.7951 |     0.7191 |     0.7552
      0.60 |     0.7120 |     0.8128 |     0.6586 |     0.7276


Unnamed: 0,Actual,Prob,Predicted,Correct
0,0,0.056840,0,True
1,1,0.614420,1,True
2,0,0.029409,0,True
3,1,0.480465,1,True
4,1,0.577030,1,True
...,...,...,...,...
287419,1,0.592096,1,True
287420,0,0.259399,0,True
287421,1,0.215355,0,False
287422,0,0.598580,1,False


## 3. Train LightGBM

In [38]:
lgbm_model = orchestrator.train_lgbm(x_train_cat, x_train_num, y_train)
lgbm_model.evaluate(x_test_cat, x_test_num, y_test, orchestrator.categorical_columns, numerical_columns)

[LightGBM] [Info] Number of positive: 1509918, number of negative: 1076893
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 1058
[LightGBM] [Info] Number of data points in the train set: 2586811, number of used features: 94
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 4060 Ti, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 15 dense feature groups (39.47 MB) transferred to GPU in 0.056829 secs. 1 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.583699 -> initscore=0.337975
[LightGBM] [Info] Start training from score 0.337975
 Threshold |   Accuracy |  Precision |     Recall |         F1
------------------------------------------------------------
      0.10 |     0.6476 |     0.6244 |     0.9952 |     0.7673
      0.30 |     0.7359 |     0.7002 |     0.9577 |     0.8090
      0.35

## 3. Train XGB

In [39]:
xgb_model = orchestrator.train_xgb(x_train_cat, x_train_num, y_train)
xgb_model.evaluate(x_test_cat, x_test_num, y_test, orchestrator.categorical_columns, numerical_columns)

 Threshold |   Accuracy |  Precision |     Recall |         F1
------------------------------------------------------------
      0.10 |     0.6532 |     0.6282 |     0.9947 |     0.7701
      0.30 |     0.7376 |     0.7023 |     0.9560 |     0.8097
      0.35 |     0.7519 |     0.7213 |     0.9372 |     0.8152
      0.40 |     0.7626 |     0.7403 |     0.9141 |     0.8181
      0.45 |     0.7704 |     0.7600 |     0.8868 |     0.8185
      0.50 |     0.7729 |     0.7786 |     0.8538 |     0.8145
      0.55 |     0.7707 |     0.7972 |     0.8146 |     0.8058
      0.60 |     0.7641 |     0.8157 |     0.7700 |     0.7922


## 4. Combined prediction (MLP + LightGBM average)

In [89]:
confirmation_df = orchestrator.evaluate_combined(threshold=0.4, mlp_model=mlp_model, lgbm_model=lgbm_model, xgb_model=xgb_model, x_test_cat=x_test_cat, x_test_num=x_test_num)
display(confirmation_df['unanimous'].value_counts())

 Threshold |   Accuracy |  Precision |     Recall |         F1
------------------------------------------------------------
      0.30 |     0.7373 |     0.7026 |     0.9540 |     0.8092
      0.35 |     0.7527 |     0.7236 |     0.9328 |     0.8150
      0.40 |     0.7647 |     0.7453 |     0.9069 |     0.8182
      0.45 |     0.7712 |     0.7656 |     0.8767 |     0.8174
      0.50 |     0.7728 |     0.7855 |     0.8404 |     0.8120
      0.55 |     0.7689 |     0.8058 |     0.7960 |     0.8009
      0.60 |     0.7588 |     0.8257 |     0.7439 |     0.7827
MLP TP: 85.30%
LGBM TP: 91.62%
XGB TP: 91.41%


unanimous
True     178822
False    108602
Name: count, dtype: int64