In [None]:
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
import numpy as np
import pandas as pd
import pickle

### Regresión Lineal

In [9]:
# Copia del DataFrame
df = pd.read_csv("../data/pisos.csv")

# Convertir columnas binarias
df["ascensor"] = df["ascensor"].map({"S": 1, "N": 0})
df["localizacion"] = df["localizacion"].map({"EXTERIOR": 1, "INTERIOR": 0})

# Normalizar la columna 'planta'
def transformar_planta(valor):
    if "ÁTICO" in valor:
        return 100
    elif "BAJO" in valor:
        return 0
    else:
        try:
            return int(valor.replace("ª", ""))
        except:
            return np.nan

df["planta"] = df["planta"].apply(transformar_planta)

# Eliminar filas con valores nulos
df.dropna(inplace=True)

# Separar variables predictoras y objetivo
X = df.drop(columns=["PrecioActual"])
y = df["PrecioActual"]

# Codificar variables categóricas como dummies
X_encoded = pd.get_dummies(X, columns=["zona"], drop_first=True)

# Dividir datos
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Entrenar modelo
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluación

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"MAE: {mae}, MSE: {mse}, R2: {r2}")

# Guardar el modelo entrenado
model_path = "../models/modelo_entrenado.pkl"
with open(model_path, "wb") as f:
    pickle.dump(model, f)

model_path, mae, r2, X_encoded.columns.tolist()


MAE: 253677.3570063976, MSE: 157216783944.83176, R2: 0.8128476574779491


('../models/modelo_entrenado.pkl',
 253677.3570063976,
 0.8128476574779491,
 ['metros',
  'habitaciones',
  'ascensor',
  'localizacion',
  'planta',
  'baños',
  'zona_barajas',
  'zona_barrio-de-salamanca',
  'zona_carabanchel',
  'zona_centro',
  'zona_chamartin',
  'zona_chamberi',
  'zona_ciudad-lineal',
  'zona_fuencarral',
  'zona_hortaleza',
  'zona_latina',
  'zona_moncloa',
  'zona_moratalaz',
  'zona_puente-de-vallecas',
  'zona_retiro',
  'zona_san-blas',
  'zona_tetuan',
  'zona_usera',
  'zona_vicalvaro',
  'zona_villa-de-vallecas',
  'zona_villaverde'])

### Regresión Lineal - variable objetivo normalizada

In [6]:
# Copia del DataFrame
df = pd.read_csv("../data/pisos.csv")

# Convertir columnas binarias
df["ascensor"] = df["ascensor"].map({"S": 1, "N": 0})
df["localizacion"] = df["localizacion"].map({"EXTERIOR": 1, "INTERIOR": 0})

# Normalizar la columna 'planta'
def transformar_planta(valor):
    if "ÁTICO" in valor:
        return 100
    elif "BAJO" in valor:
        return 0
    else:
        try:
            return int(valor.replace("ª", ""))
        except:
            return np.nan

df["planta"] = df["planta"].apply(transformar_planta)

# Eliminar filas con valores nulos
df.dropna(inplace=True)

# Separar variables predictoras y objetivo
X = df.drop(columns=["PrecioActual"])
y = np.log1p(df["PrecioActual"])  # Transformación logarítmica

# Codificar variables categóricas como dummies
X_encoded = pd.get_dummies(X, columns=["zona"], drop_first=True)

# Dividir datos
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Entrenar modelo
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluación
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred_log = model.predict(X_test)
y_pred = np.expm1(y_pred_log)      # Inversa de la transformación logarítmica
y_test_real = np.expm1(y_test)     # Inversa para comparar en escala original

mae = mean_absolute_error(y_test_real, y_pred)
mse = mean_squared_error(y_test_real, y_pred)
r2 = r2_score(y_test_real, y_pred)
print(f"MAE: {mae}, MSE: {mse}, R2: {r2}")

# Guardar el modelo entrenado
#import pickle
#model_path = "../models/modelo_entrenado.pkl"
#with open(model_path, "wb") as f:
#    pickle.dump(model, f)
#
#model_path, mae, mse, r2, X_encoded.columns.tolist()

MAE: 280178.61107104114, MSE: 417293787234.72784, R2: 0.5032495396402304
