In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler

# Crear un diccionario con los datos
data = {
    'sqft': [2104, 1600, 2400, 1416, 3000, 1985, 1534],
    'habitaciones': [3, 3, 3, 2, 4, 4, 3],
    'edad': [10, 5, 20, 15, 8, 12, 30],
    'precio': [399900, 329900, 369000, 232000, 539900, 299900, 314900]
}

# Crear un DataFrame a partir del diccionario
df = pd.DataFrame(data)

# Seleccionar las características (X) y la variable objetivo (y)
X = df[['sqft', 'habitaciones', 'edad']]
y = df['precio']

# Normalizar las características
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Crear y entrenar el modelo de regresión de árbol de decisión con hiperparámetros ajustados
regressor = DecisionTreeRegressor(max_depth=3, min_samples_split=2, random_state=42)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
r2 = r2_score(y_test, y_pred)
results = pd.DataFrame({'Real': y_test, 'Predicción': y_pred})
print("The model performance for test set")
print("-------------------------------")
print(f'R^2 score is {r2:.2f}')
print(results)

# Probar con un modelo de regresión lineal para comparar
linear_regressor = LinearRegression()
linear_regressor.fit(X_train, y_train)
linear_y_pred = linear_regressor.predict(X_test)
linear_r2 = r2_score(y_test, linear_y_pred)
linear_results = pd.DataFrame({'Real': y_test, 'Predicción': linear_y_pred})
print("\nLinear Regression model performance for test set")
print("-----------------------------------------------")
print(f'R^2 score is {linear_r2:.2f}')
print(linear_results)

# Probar con un modelo de RandomForestRegressor para comparar
forest_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
forest_regressor.fit(X_train, y_train)
forest_y_pred = forest_regressor.predict(X_test)
forest_r2 = r2_score(y_test, forest_y_pred)
forest_results = pd.DataFrame({'Real': y_test, 'Predicción': forest_y_pred})
print("\nRandom Forest Regressor model performance for test set")
print("-----------------------------------------------")
print(f'R^2 score is {forest_r2:.2f}')
print(forest_results)



The model performance for test set
-------------------------------
R^2 score is -25.00
     Real  Predicción
0  399900    539900.0
1  329900    539900.0

Linear Regression model performance for test set
-----------------------------------------------
R^2 score is -4.15
     Real     Predicción
0  399900  339128.661900
1  329900  235398.805971

Random Forest Regressor model performance for test set
-----------------------------------------------
R^2 score is 0.19
     Real  Predicción
0  399900    379952.0
1  329900    369872.0
