In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
# 1. Carga tu dataset
df = pd.read_parquet("../data/data_modelo_profesionales.parquet")

In [3]:
# 2. Codifica variables categóricas y guarda los encoders
le_ipress = LabelEncoder()
le_especialidad = LabelEncoder()
df["COD_IPRESS"] = le_ipress.fit_transform(df["COD_IPRESS"].astype(str))
df["ESPECIALIDAD_MEDICA"] = le_especialidad.fit_transform(df["ESPECIALIDAD_MEDICA"].astype(str))

In [5]:
# 3. Modelo 1: predecir TOTAL_ATENCIONES
X_att = df[["ANIO", "MES", "COD_IPRESS", "ESPECIALIDAD_MEDICA"]]
y_att = df["TOTAL_ATENCIONES"]

In [6]:
X_train_att, X_test_att, y_train_att, y_test_att = train_test_split(X_att, y_att, test_size=0.2, random_state=42)

In [7]:
model_att = RandomForestRegressor(n_estimators=30, max_depth=8, random_state=42)
model_att.fit(X_train_att, y_train_att)

In [8]:
print("Score modelo atenciones:", model_att.score(X_test_att, y_test_att))

Score modelo atenciones: 0.39789751446935495


In [9]:
y_pred = model_att.predict(X_test_att)
mae = mean_absolute_error(y_test_att, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_att, y_pred))
mape = np.mean(np.abs((y_test_att - y_pred) / y_test_att)) * 100

print("MAE:", mae)
print("RMSE:", rmse)
print("MAPE:", mape, "%")

MAE: 62.97971513599394
RMSE: 247.57671616620812
MAPE: 540.8186120083594 %


In [10]:
# 4. Agrega la predicción de atenciones como feature
df["PRED_ATENCIONES"] = model_att.predict(X_att)

In [12]:
# 5. Modelo 2: predecir PROFESIONALES_REQUERIDOS usando la predicción anterior
X_prof = df[["ANIO", "MES", "COD_IPRESS", "ESPECIALIDAD_MEDICA", "PRED_ATENCIONES"]]
y_prof = df["PROFESIONALES_REQUERIDOS"]

In [13]:
X_train_prof, X_test_prof, y_train_prof, y_test_prof = train_test_split(X_prof, y_prof, test_size=0.2, random_state=42)

In [14]:
model_prof = RandomForestRegressor(n_estimators=30, max_depth=8, random_state=42)
model_prof.fit(X_train_prof, y_train_prof)

In [15]:
print("Score modelo profesionales:", model_prof.score(X_test_prof, y_test_prof))

Score modelo profesionales: 0.6569311587691841


In [16]:
# 6. Métricas del modelo de profesionales
y_pred_prof = model_prof.predict(X_test_prof)
mae = mean_absolute_error(y_test_prof, y_pred_prof)
rmse = np.sqrt(mean_squared_error(y_test_prof, y_pred_prof))
mape = np.mean(np.abs((y_test_prof - y_pred_prof) / y_test_prof)) * 100

print("MAE:", mae)
print("RMSE:", rmse)
print("MAPE:", mape, "%")

MAE: 3.9249105423265496
RMSE: 12.449856320525349
MAPE: 127.4928706463515 %


In [17]:
# 7. Guarda los modelos y los encoders
joblib.dump(model_att, "modeloRFv4/modelo_rf_atenciones.joblib")
joblib.dump(model_prof, "modeloRFv4/modelo_rf_profesionales_cascada.joblib")
joblib.dump(le_ipress, "modeloRFv4/le_ipress_simple.joblib")
joblib.dump(le_especialidad, "modeloRFv4/le_especialidad_simple.joblib")

['modeloRFv4/le_especialidad_simple.joblib']