In [3]:
import psycopg2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.feature_selection import SelectKBest, f_classif
import os

In [4]:
import mlflow
import mlflow.sklearn

In [7]:
#mlflow.set_tracking_uri("http://host.docker.internal:5000")  # URL donde corre MLflow

In [6]:
mlflow.set_tracking_uri("http://mlflow:5000")

In [8]:
experiments = {exp.name: exp.experiment_id for exp in mlflow.search_experiments()}

if "random_forest_model" in experiments:
    mlflow.set_experiment(experiment_id=experiments["random_forest_model"])
else:
    mlflow.create_experiment("random_forest_model")
    mlflow.set_experiment("random_forest_model")

In [20]:
# conn = psycopg2.connect(
#     dbname='puj_mflow_models', user='postgres', password='postgres', host='db'
# )
# cursor = conn.cursor()

In [9]:
_data_root = './data/'

In [10]:
_data_filepath = os.path.join(_data_root, 'train_preprocessed.csv')

In [11]:
file_path = "./data/train_preprocessed.csv"
df = pd.read_csv(file_path)

In [12]:
# Separar características y etiquetas
X = df.drop(columns=["Cover_Type"])
y = df["Cover_Type"]

In [13]:
X = pd.get_dummies(X, drop_first=True)
X

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,...,Soil_Type_C7755,Soil_Type_C7756,Soil_Type_C7757,Soil_Type_C7790,Soil_Type_C8703,Soil_Type_C8707,Soil_Type_C8708,Soil_Type_C8771,Soil_Type_C8772,Soil_Type_C8776
0,3305,35,13,134,25,5811,218,211,127,659,...,False,False,False,False,False,False,False,True,False,False
1,3295,332,10,934,160,924,196,227,170,2519,...,False,True,False,False,False,False,False,False,False,False
2,3320,339,12,711,106,979,195,223,167,2531,...,False,False,False,False,False,False,False,False,False,False
3,3321,16,14,785,193,3221,207,209,137,787,...,False,False,False,False,False,False,False,False,False,False
4,2959,233,17,190,85,1519,189,253,198,999,...,False,False,True,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92957,2324,26,33,124,67,1473,185,150,81,1034,...,False,False,False,False,False,False,False,False,False,False
92958,2384,144,6,95,10,604,230,239,143,934,...,False,False,False,False,False,False,False,False,False,False
92959,3287,96,6,67,4,4496,231,231,133,3684,...,False,False,False,False,False,False,False,False,False,False
92960,2960,344,8,30,3,3193,205,228,162,1176,...,False,False,False,False,False,False,False,False,False,False


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# Definir hiperparámetros a probar
n_estimators_list = [10, 50, 100, 200]
max_depth_list = [None, 5, 10, 20]

In [16]:
for n_estimators in n_estimators_list:
    for max_depth in max_depth_list:
        with mlflow.start_run():  # Iniciar un experimento en MLflow
            # Crear y entrenar modelo
            model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            acc = accuracy_score(y_test, y_pred)

            # # Guardar resultados en la base de datos
            # cursor.execute(
            #     "INSERT INTO experiment_results (n_estimators, max_depth, accuracy) VALUES (%s, %s, %s)",
            #     (n_estimators, max_depth, acc)
            # )
            # conn.commit()

            # Registrar parámetros y métricas en MLflow
            mlflow.log_param("n_estimators", n_estimators)
            mlflow.log_param("max_depth", max_depth)
            mlflow.log_metric("accuracy", acc)
            mlflow.sklearn.log_model(model, "random_forest_model")

            print(f'n_estimators={n_estimators}, max_depth={max_depth}, Accuracy={acc:.5f}')




n_estimators=10, max_depth=None, Accuracy=0.87796
🏃 View run orderly-mouse-524 at: http://mlflow:5000/#/experiments/1/runs/aaeb94dda9a44c0aa386ad12a6623441
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=10, max_depth=5, Accuracy=0.67230
🏃 View run melodic-bug-469 at: http://mlflow:5000/#/experiments/1/runs/6a656021c8524094ab5a290471daeda1
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=10, max_depth=10, Accuracy=0.75227
🏃 View run clean-midge-659 at: http://mlflow:5000/#/experiments/1/runs/8f96fdcd1cdb4418b177088228e23622
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=10, max_depth=20, Accuracy=0.85349
🏃 View run painted-deer-629 at: http://mlflow:5000/#/experiments/1/runs/058c49fdeb9644d78a4c42c76527d4fc
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=50, max_depth=None, Accuracy=0.89932
🏃 View run lyrical-shrimp-67 at: http://mlflow:5000/#/experiments/1/runs/5f1fc08940944174b4a4e0900d16f590
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=50, max_depth=5, Accuracy=0.67240
🏃 View run stylish-stag-233 at: http://mlflow:5000/#/experiments/1/runs/406edd2428d14b8cb57324feb92779a9
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=50, max_depth=10, Accuracy=0.74878
🏃 View run abrasive-rat-631 at: http://mlflow:5000/#/experiments/1/runs/8fc25d780318437a9e6a01e56479574c
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=50, max_depth=20, Accuracy=0.85758
🏃 View run tasteful-lark-386 at: http://mlflow:5000/#/experiments/1/runs/d32d0ccc45f84559bf74d8b8a6626013
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=100, max_depth=None, Accuracy=0.90238
🏃 View run handsome-rat-469 at: http://mlflow:5000/#/experiments/1/runs/0a95d5562255461291d25dc0a515e2bb
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=100, max_depth=5, Accuracy=0.66966
🏃 View run invincible-fawn-4 at: http://mlflow:5000/#/experiments/1/runs/72e1bd0f991c4dbc89efdbc76df9d8e6
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=100, max_depth=10, Accuracy=0.75260
🏃 View run bedecked-conch-889 at: http://mlflow:5000/#/experiments/1/runs/a30c60f23ee74f698c8534d270b4794e
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=100, max_depth=20, Accuracy=0.86086
🏃 View run receptive-horse-725 at: http://mlflow:5000/#/experiments/1/runs/c03ed5e794e14bc58044f874896274cf
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=200, max_depth=None, Accuracy=0.90131
🏃 View run victorious-perch-523 at: http://mlflow:5000/#/experiments/1/runs/696080d01e46419f8b15696a750d6ffb
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=200, max_depth=5, Accuracy=0.66955
🏃 View run wise-jay-111 at: http://mlflow:5000/#/experiments/1/runs/347c58f5003e46f58adc9e7d1d33fecd
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=200, max_depth=10, Accuracy=0.75464
🏃 View run learned-shoat-104 at: http://mlflow:5000/#/experiments/1/runs/3b7ce6ff5b0742bba7be10079201cb43
🧪 View experiment at: http://mlflow:5000/#/experiments/1




n_estimators=200, max_depth=20, Accuracy=0.85952
🏃 View run abrasive-bird-997 at: http://mlflow:5000/#/experiments/1/runs/d761c589d7ae49aa98cfcf89bac6491e
🧪 View experiment at: http://mlflow:5000/#/experiments/1


In [17]:
# Entrenamiento del modelo
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [18]:
# Predicción y evaluación
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Precisión del modelo: {accuracy:.4f}")
print(f"Precisión: {precision:.4f}")
print(f"Sensibilidad (Recall): {recall:.4f}")
print(f"Puntaje F1: {f1:.4f}")

Precisión del modelo: 0.9024
Precisión: 0.9028
Sensibilidad (Recall): 0.9024
Puntaje F1: 0.9012
