In [None]:
# ================================================
# 1. Librerías
# ================================================
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_squared_error, r2_score

# ================================================
# 2. Carga del dataset
# ================================================
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="MedianHouseValue")

df = X.copy()
df["MedianHouseValue"] = y
df.head()

# ================================================
# 3. Exploración
# ================================================
df.describe()

plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title("Correlaciones del dataset California Housing")
plt.show()

# ================================================
# 4. Separación de datos
# ================================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Normalización
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ================================================
# 5. Entrenamiento de modelos
# ================================================
model_lr = LinearRegression()
model_lr.fit(X_train_scaled, y_train)

model_tree = DecisionTreeRegressor(max_depth=6, random_state=42)
model_tree.fit(X_train, y_train)

model_knn = KNeighborsRegressor(n_neighbors=5)
model_knn.fit(X_train_scaled, y_train)

model_svr = SVR(kernel="rbf")
model_svr.fit(X_train_scaled, y_train)

model_rf = RandomForestRegressor(n_estimators=200, random_state=42)
model_rf.fit(X_train, y_train)

# ================================================
# 6. Evaluación de modelos
# ================================================
def evaluar(modelo, X, y):
    pred = modelo.predict(X)
    rmse = mean_squared_error(y, pred, squared=False)
    r2 = r2_score(y, pred)
    return rmse, r2

resultados = pd.DataFrame({
    "Modelo": ["Linear Regression", "Decision Tree", "KNN", "SVR", "Random Forest"],
    "RMSE": [
        evaluar(model_lr, X_test_scaled, y_test)[0],
        evaluar(model_tree, X_test, y_test)[0],
        evaluar(model_knn, X_test_scaled, y_test)[0],
        evaluar(model_svr, X_test_scaled, y_test)[0],
        evaluar(model_rf, X_test, y_test)[0],
    ],
    "R2 Score": [
        evaluar(model_lr, X_test_scaled, y_test)[1],
        evaluar(model_tree, X_test, y_test)[1],
        evaluar(model_knn, X_test_scaled, y_test)[1],
        evaluar(model_svr, X_test_scaled, y_test)[1],
        evaluar(model_rf, X_test, y_test)[1],
    ]
})

# Gráfica comparativa
plt.figure(figsize=(8,5))
sns.barplot(data=resultados, x="Modelo", y="R2 Score")
plt.title("Comparación de modelos (R2 Score)")
plt.ylim(0, 1)
plt.xticks(rotation=45)
plt.show()

# ================================================
# 7. Widget interactivo de predicción
# ================================================
sliders = {}
for feature in X.columns:
    sliders[feature] = widgets.FloatSlider(
        value=float(X[feature].mean()),
        min=float(X[feature].min()),
        max=float(X[feature].max()),
        step=0.1,
        description=feature
    )

output = widgets.Output()

def actualizar(change=None):
    with output:
        output.clear_output()
        valores = [sliders[f].value for f in X.columns]
        valores_scaled = scaler.transform([valores])
        pred = model_rf.predict([valores])[0]  # random forest
        print("Predicción estimada del precio medio:", round(pred, 3))

for s in sliders.values():
    s.observe(actualizar, names='value')

display(*sliders.values(), output)
actualizar()
