In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

data = pd.read_csv("treino_sinais_vitais_com_label.csv")

X = data[["si3", "si4", "si5"]].values
y = data["gi"].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.35, random_state=42
)

# Normalizar os dados
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

rf_regressor = RandomForestRegressor(
    n_estimators=1000,
    max_depth=15,  # Limitando a profundidade para evitar overfitting
    min_samples_leaf=3,  # Garantindo que cada folha tenha pelo menos 3 amostras
    criterion="absolute_error",  # Mantendo o critério como mse, você pode testar 'mae' também
    random_state=42,
    verbose=1,
)
rf_regressor.fit(X_train, y_train)

y_pred = rf_regressor.predict(X_test)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.8s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    4.1s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    7.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s


In [30]:
rmse = mean_absolute_error(y_test, y_pred)
print(f"RMSE (Random Forest Regressor): {rmse:.4f}")

results = pd.DataFrame({"Real": y_test, "Predito": y_pred})
results.head(30)

RMSE (Random Forest Regressor): 2.1430


Unnamed: 0,Real,Predito
0,14.294521,17.254731
1,23.728687,24.702518
2,37.589263,34.726894
3,37.477275,37.214825
4,49.535736,52.450427
5,45.855634,46.116639
6,60.911514,58.570582
7,72.971933,73.216383
8,34.652059,34.091672
9,40.0,38.144676
