In [None]:
import numpy as np
import pandas as pd

# Copia del DataFrame
df = pd.read_csv("../data/pisos.csv")

# Convertir columnas binarias
df["ascensor"] = df["ascensor"].map({"S": 1, "N": 0})
df["localizacion"] = df["localizacion"].map({"EXTERIOR": 1, "INTERIOR": 0})

# Normalizar la columna 'planta'
def transformar_planta(valor):
    if "ÁTICO" in valor:
        return 100
    elif "BAJO" in valor:
        return 0
    else:
        try:
            return int(valor.replace("ª", ""))
        except:
            return np.nan

df["planta"] = df["planta"].apply(transformar_planta)

# Eliminar filas con valores nulos
df.dropna(inplace=True)

# Separar variables predictoras y objetivo
X = df.drop(columns=["PrecioActual"])
y = df["PrecioActual"]

# Codificar variables categóricas como dummies
X_encoded = pd.get_dummies(X, columns=["zona"], drop_first=True)

# Dividir datos
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Entrenar modelo
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluación
from sklearn.metrics import mean_absolute_error, r2_score

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Guardar el modelo entrenado
model_path = "../models/modelo_entrenado.pkl"
with open(model_path, "wb") as f:
    pickle.dump(model, f)

model_path, mae, r2, X_encoded.columns.tolist()
