## RandomForest

In [12]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import sys
import os

### Data Reading & Splitting

In [13]:
notebook_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, ".."))
sys.path.append(project_root)
csv_path = os.path.join(project_root, "archive", "preprocessed_files", "processed.csv")
df = pd.read_csv(csv_path)

In [14]:
X = df.drop(columns=["log_price"], axis=1)
y = df["log_price"]

X = pd.get_dummies(X, drop_first=True)
X = X.astype(float)

y = y.values.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

### Train & Test

In [35]:
# --- RANDOM FOREST ---
model = RandomForestRegressor(
    n_estimators=100,
    max_depth=40,
    random_state=42,
    min_samples_split=20,
    min_samples_leaf=1,
    n_jobs=-1
)

# Entrenar
model.fit(X_train, y_train.ravel())

# Predicciones
y_pred = model.predict(X_test)

# Métricas
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("RMSE:", mse**0.5)
print("R²:", r2)

MSE: 0.07150068694321453
RMSE: 0.26739612365031495
R²: 0.6855788990723999
