# Orchestrator Test

Smoke test for `ModelsOrchestrator` — data loading, MLP training, LightGBM training, and combined prediction.

In [2]:
import sys                                                                                                                                                                                                
print(sys.executable)

c:\Users\angej\Documents\2_Programação\health_index_project\.venv\Scripts\python.exe


In [1]:
import sys
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score, precision_score, recall_score
from pathlib import Path
sys.path.append(str(Path.cwd().parent))

from models_classes.models_orchestrator import ModelsOrchestrator
from models_classes.mlp_disease_neural_net import device

print(f'PyTorch: {torch.__version__}')
print(f'Using device: {device}')

Python: c:\Users\angej\Documents\2_Programação\health_index_project\.venv\Scripts\python.exe
PyTorch: 2.10.0+cu128
CUDA built with: 12.8
CUDA available: True
Using device: cuda


ModuleNotFoundError: No module named 'optuna'

## 1. Load data

In [None]:
orchestrator = ModelsOrchestrator(type_disease='chikungunya')

x_train_cat, x_test_cat, x_train_num, x_test_num, y_train, y_test, embedding_sizes = orchestrator.prepare_data()
numerical_columns = orchestrator.df.drop(columns=list(orchestrator.categorical_columns) + ['final_classification']).columns

print(f'Train: {len(y_train)} | Test: {len(y_test)}')
print(f'Categorical features: {len(orchestrator.categorical_columns)}')
print(f'Numerical features:   {len(numerical_columns)}')

## 2. Train MLP

In [None]:
mlp_model = orchestrator.train_mlp(embedding_sizes=embedding_sizes,save_path='C:\\Users\\angej\\Documents\\2_Programação\\health_index_project\\models_saved\\best_orchestrator_mlp.pth')

### 2.1 MLP evaluation

In [None]:
display(mlp_model.evaluate(orchestrator.test_loader, orchestrator.y_test))

## 3. Train LightGBM

In [None]:
lgbm_model = orchestrator.train_lgbm(fast_train=False, x_train_cat=x_train_cat, x_train_num=x_train_num, y_train=y_train)
lgbm_model.evaluate(x_test_cat, x_test_num, y_test, orchestrator.categorical_columns, orchestrator.numerical_columns)

## 3. Train XGB

In [None]:
xgb_model = orchestrator.train_xgb(fast_train=False, x_train_cat=x_train_cat, x_train_num=x_train_num, y_train=y_train)
xgb_model.evaluate(x_test_cat, x_test_num, y_test, orchestrator.categorical_columns, orchestrator.numerical_columns)

## 4. Combined prediction (MLP + LightGBM average)

In [None]:
confirmation_df = orchestrator.evaluate_combined(threshold=0.4, mlp_model=mlp_model, lgbm_model=lgbm_model, xgb_model=xgb_model, x_test_cat=x_test_cat, x_test_num=x_test_num)
display(confirmation_df['unanimous'].value_counts())