In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pickle
import ipywidgets as widgets
from IPython.display import display



In [None]:
# Cargar los datos
df = pd.read_csv("house_data/Bengaluru_House_Data.csv")

In [None]:
# Preprocesamiento básico
df = df[['location', 'size', 'total_sqft', 'bath', 'balcony', 'price']]
df.dropna(inplace=True)
df['bhk'] = df['size'].apply(lambda x: int(x.split(' ')[0]))
df.drop(columns=['size'], inplace=True)

In [None]:
# Convertir total_sqft en número
def convert_sqft_to_num(x):
    try:
        return float(x)
    except:
        tokens = x.split('-')
        if len(tokens) == 2:
            return (float(tokens[0]) + float(tokens[1])) / 2
        return None

df['total_sqft'] = df['total_sqft'].apply(convert_sqft_to_num)
df.dropna(inplace=True)


In [None]:
# Codificación de variables categóricas
df = pd.get_dummies(df, columns=['location'], drop_first=True)

In [None]:
# Separar variables predictoras y objetivo
X = df.drop(columns=['price'])
y = df['price']

In [None]:
# Dividir en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Entrenar el modelo
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluación del modelo
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

#print(f'MAE: {mae}')
#print(f'MSE: {mse}')
#print(f'RMSE: {rmse}')
#print(f'R2 Score: {r2}')

# Guardar el modelo
with open("house_price_model.pkl", "wb") as file:
    pickle.dump(model, file)


In [None]:
# Visualización
# def plot_evaluation(y_test, y_pred):
#    plt.figure(figsize=(12,5))
#    plt.subplot(1,2,1)
#    sns.scatterplot(x=y_test, y=y_pred, alpha=0.5)
#    plt.xlabel("Actual Prices")
#    plt.ylabel("Predicted Prices")
#    plt.title("Actual vs Predicted Prices")

#    plt.subplot(1,2,2)
#    residuals = y_test - y_pred
#    sns.histplot(residuals, bins=50, kde=True)
#    plt.xlabel("Residuals")
#    plt.title("Residuals Distribution")
#    plt.show()

#plot_evaluation(y_test, y_pred)


In [None]:
from IPython.display import display, HTML

# Cargar el modelo entrenado y las columnas
try:
    with open("house_price_model.pkl", "rb") as file:
        model = pickle.load(file)
    with open("columns.pkl", "rb") as file:
        columns = pickle.load(file)
except Exception as e:
    print("Error al cargar el modelo:", e)
    model, columns = None, []

# Agregar estilos CSS para centrar y mejorar la interfaz
style_html = """
    <style>
        body { font-family: Arial, sans-serif; }
        .container {
            width: 50%;
            margin: auto;
            text-align: center;
            padding: 20px;
            border-radius: 10px;
            box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1);
        }
        .title {
            font-size: 28px;
            font-weight: bold;
            color: #2C3E50;
        }
        .widget-container {
            display: flex;
            flex-direction: column;
            align-items: center;
        }
        .widget-container > * {
            margin: 5px;
            width: 80%;
        }
        .output {
            font-size: 18px;
            margin-top: 15px;
            color: #1A5276;
            font-weight: bold;
        }
    </style>
"""
display(HTML(style_html))


# Título
display(HTML("<div class='title'>Estimador de Precios de Viviendas en Bangalore</div>"))

# Widgets de entrada centrados
sqft_input = widgets.IntText(value=1000, description="Total Sqft:")
bath_input = widgets.IntText(value=2, description="Baños:")
balcony_input = widgets.IntText(value=1, description="Balcones:")
bhk_input = widgets.IntText(value=2, description="BHK:")
location_input = widgets.Dropdown(
    options=[col.replace("location_", "") for col in columns if "location_" in col],
    description="Ubicación:")

# Botón de predicción
button = widgets.Button(description="Predecir Precio", button_style="primary")
output = widgets.Output()

# Función para hacer la predicción
def predict_price(change):
    with output:
        output.clear_output()
        try:
            # Crear un diccionario con valores en el orden correcto
            data_dict = {col: 0 for col in columns}  # Inicializar todas las columnas en 0
            data_dict["total_sqft"] = sqft_input.value
            data_dict["bath"] = bath_input.value
            data_dict["balcony"] = balcony_input.value
            data_dict["bhk"] = bhk_input.value
            
            # Activar la columna correspondiente a la ubicación
            location_col = f"location_{location_input.value}"
            if location_col in data_dict:
                data_dict[location_col] = 1
            
            # Convertir a DataFrame
            input_data = pd.DataFrame([data_dict])

            # Realizar la predicción
            prediction = model.predict(input_data)[0]
            prediction = max(0, prediction)  # Evitar valores negativos
            
            display(HTML(f"<div class='output'>Precio estimado: {prediction:.2f} Lakhs</div>"))
        except Exception as e:
            display(HTML(f"<div class='output' style='color: red;'>Error en la predicción: {e}</div>"))

button.on_click(predict_price)

# Contenedor de widgets
display(HTML("<div class='widget-container'>"))
display(sqft_input, bath_input, balcony_input, bhk_input, location_input, button, output)
display(HTML("</div>"))

# Cerrar contenedor principal
display(HTML("</div>"))
