In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV, learning_curve
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import numpy as np


data = pd.read_csv('dataset.csv')
X = data.iloc[:, :-1]  
y = data.iloc[:, -1]   


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = RandomForestRegressor(random_state=42)

param_distributions = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

random_search = RandomizedSearchCV(
    estimator=model, param_distributions=param_distributions, n_iter=10, cv=5, random_state=42, scoring='r2'
)
random_search.fit(X_train, y_train)
best_model = random_search.best_estimator_


y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mejor modelo: {random_search.best_params_}")
print(f"Mean Squared Error: {mse}")
print(f"R2 Score: {r2}")


train_sizes, train_scores, test_scores = learning_curve(
    best_model, X, y, train_sizes=np.linspace(0.1, 1.0, 5), cv=5, scoring='r2'
)
train_scores_mean = np.mean(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)

plt.figure()
plt.plot(train_sizes, train_scores_mean, label="Training score")
plt.plot(train_sizes, test_scores_mean, label="Validation score")
plt.title("Learning Curve (Random Forest)")
plt.xlabel("Training Size")
plt.ylabel("Score")
plt.legend()
plt.grid()
plt.show()
