In [105]:
%%writefile .streamlit/config.toml
[theme]
primaryColor = "#5DAA54"           
backgroundColor = "#F5F5F5"         
secondaryBackgroundColor = "#B7CDE2" 
textColor = "#2C3E50"               
font = "sans serif"


Overwriting .streamlit/config.toml


In [106]:
%%writefile airbnb.py

# Importamos librerías necesarias
import streamlit as st
import streamlit.components.v1 as components
import plotly.express as px
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import base64
import pydeck as pdk

#Colores para gráficos 
color_victoria = ["#83C5BE", "#97B5D0", "#A4D4AE", "#B799D6"]

#función que rota el color segun indices: 
def color_indice(i): 
     return [color_victoria[i % len(color_victoria)], color_victoria[(i + 1) % len(color_victoria)]]
   


# Ajustamos las medidas de la pantalla para el dashboard
st.markdown("""
    <style>
    .block-container {
        max-width: 1200px;
        padding-left: 2rem;
        padding-right: 2rem;
        padding-top: 2rem;
    }

    /* Estilo para el sidebar */
    section[data-testid="stSidebar"] {
        background-color: #A9DGE5;  
        color: #2C3E50;
        padding: 20px;
        border-right: 2px solid #560B28D
    }

    /* Cambiar color de los títulos en sidebar */
    .sidebar .block-container h1, 
    .sidebar .block-container h2, 
    .sidebar .block-container h3 {
        color: #2C3E50;
    }

    /* Cambiar color de los labels de los widgets */
    .css-1r6slb0 p {
        color: #2C3E50;
        font-weight: bold;
    }
    </style>
""", unsafe_allow_html=True)



# Función para cargar imagen local como base64
def get_img_as_base64(file_path):
    with open(file_path, "rb") as f:
        data = f.read()
    return base64.b64encode(data).decode()

img_base64 = get_img_as_base64("Growthtrack.png")

components.html(f"""
    <style>
    .custom-header {{
        padding: 15px 50px;
        position: fixed;
        top: 0;
        left: 0;
        height: 140px;
        width: 100vw;
        z-index: 9999;
        display: flex;
        align-items: center;
        box-shadow: 0 1px 5px rgba(0,0,0,0.1);
    }}

    .custom-header img {{
        height: 140px;
        margin-right: 20px;
    }}

    .custom-header h1 {{
        font-size: 80px;
        font-weight: 700;
        color: #2C3E50;
        margin: 0;
        font-family: fangsong;
    }}

    .stApp {{
        margin-top: 160px;
    }}
    </style>

    <div class="custom-header">
        <img src="data:image/png;base64,{img_base64}">
        <h1>GrowthTrack</h1>
    </div>
""", height=140)


# Espacio para que no se solape el contenido con el header
st.markdown("<div style='padding-top: 50px'></div>", unsafe_allow_html=True)

# Función para transformar variables categóricas a numéricas
def transformar_variables(df):
    # Convertir columnas booleanas tipo texto a 0 y 1
    binarios = ["host_is_superhost", "host_identity_verified", "instant_bookable", "has_availability"]
    for col in binarios:
        if col in df.columns:
            df[col] = df[col].map({"t": 1, "f": 0})

    # Limpiar y convertir host_response_rate
    if "host_response_rate" in df.columns:
        df["host_response_rate"] = df["host_response_rate"].str.replace("%", "", regex=False)
        df["host_response_rate"] = pd.to_numeric(df["host_response_rate"], errors='coerce')

    # Reemplazar property_type por su frecuencia relativa
    if "property_type" in df.columns:
        frecuencia = df["property_type"].value_counts(normalize=True)
        df["property_type"] = df["property_type"].map(frecuencia)

    return df


# Función para cargar datos
@st.cache_resource
def load_data(): 
    df = pd.read_csv("Victoria_limpio.csv")
    df = df.drop(['Unnamed: 0'], axis=1)

    numeric_df = df.select_dtypes(['float', 'int'])
    numeric_cols = numeric_df.columns

    text_df = df.select_dtypes(['object'])
    text_cols = text_df.columns

    if "property_type" in df.columns:
        unique_categories_host = df["property_type"].unique()
    else:
        unique_categories_host = []

    return df, numeric_cols, text_cols, unique_categories_host, numeric_df

# Cargar los datos
df, numeric_cols, text_cols, unique_categories_host, numeric_df = load_data()

# Sidebar
#pagina = st.sidebar.radio("",["Home Page", "DataBase", "Gráficos","Regresion", "Comparación", "Mapa"])
from streamlit_option_menu import option_menu 

with st.sidebar: 
    pagina = option_menu(
        menu_title = None,
        options = ["Home Page", "DataBase", "Gráficos","Regresion", "Mapa"],
        icons = ["house -fill", "table", "bar-chart-line", "activity", "geo-alt"],
        default_index = 0, 
        orientation = "vertical",
        styles = {
            "container" : {"padding": "5px", "background-color": "#B7CDE2"},
            "icon" : {"color": "#2C3E50", "font-size" : "40px"},
            "nav-link": {
                "font-size": "0px",
                "text-align": "center",
                "margin": "0px",
                "--hover-color": "#B39CD0", 
            },
            "nav-link-selected": {"background-color": "#B7CDE2"},
        }
    )

# Home 
if pagina == "Home Page": 
    st.title("Victoria, Canada")

    # Descripción + imagen
    col1i, col2i = st.columns(2)
    with col1i: 
        st.markdown("""
        Victoria, la capital de Columbia Británica, es conocida por su atmósfera costera, rica historia colonial, 
        arquitectura majestuosa y sus hermosos jardines. Su economía combina turismo, tecnología y servicios, 
        convirtiéndola en un destino ideal tanto para vivir como para visitar.
        """)
    
        st.markdown("## ¿Por qué elegir Victoria?")
        st.markdown("""
        - Naturaleza abundante y paisajes costeros únicos  
        - Arquitectura histórica y jardines espectaculares  
        - Cultura relajada, perfecta para escapar del estrés  
        - Estilo de vida sostenible y amigable
        """)
    with col2i: 
        st.image("victoria.png", use_container_width=True)

    # Métricas principales
    st.markdown("## Métricas clave")
    colm1, colm2, colm3 = st.columns(3)
    with colm1:
        st.metric("Hospedajes disponibles", f"{df.shape[0]}")
    with colm2:
        st.metric("Precio promedio", f"${df['price'].mean():.2f}")
    with colm3:
        verif_pct = (df['host_identity_verified'] == 't').mean() * 100
        st.metric("Anfitriones verificados", f"{verif_pct:.1f}%")


    # Gráficos
    colg1, colg2, colg3 = st.columns(3)
    with colg1:
        st.markdown("### Tipos de habitación")
        fig1 = px.pie(df, names="room_type")
        st.plotly_chart(fig1, use_container_width=True) 
    with colg2: 
        st.markdown("### Distribución por vecindario")
        fig2 = px.bar(df, x="neighbourhood_cleansed", color="neighbourhood_cleansed")
        st.plotly_chart(fig2, use_container_width=True)
    with colg3: 
        st.markdown("### Relación precio-noches")
        fig3 = px.scatter(df, x="price", y="minimum_nights", color="room_type")
        st.plotly_chart(fig3, use_container_width=True)

    # Videos
    st.markdown("## 🎥 ¡Conoce sobre Victoria!")
    colv1, colv2, colv3, colv4 = st.columns(4)
    with colv1:
        st.video("https://youtu.be/TetH6ye3Z2c?si=uQ5MQc6_FTzfRn_U")
    with colv2: 
        st.video("https://youtu.be/iytB-8FlJ4I?si=X8ToVbUAQPI5tLx2")
    with colv3: 
        st.video("https://youtu.be/2BJWZWQJ7s0?si=1FD1dD329I_Dc4zE")
    with colv4: 
        st.video("https://youtu.be/cIFe4Uw-bS4?si=Dmpotdhk-iZ63Phi")


    

# Mostrar database
elif pagina == "DataBase":
    st.title("Información relevante de hospedajes")

    # Valores por defecto
    precio_min = int(df["price"].min())
    precio_max = int(df["price"].max())
    room_types = df["room_type"].unique().tolist()
    default_columns = ["room_type", "price", "neighbourhood_cleansed"]

    # Estado inicial
    if "precio_filtro" not in st.session_state:
        st.session_state["precio_filtro"] = (precio_min, precio_max)
    if "room_filtro" not in st.session_state:
        st.session_state["room_filtro"] = room_types
    if "cols_filtro" not in st.session_state:
        st.session_state["cols_filtro"] = default_columns

    # Botón para resetear filtros
    if st.sidebar.button("Restablecer filtros"):
        st.session_state["precio_filtro"] = (precio_min, precio_max)
        st.session_state["room_filtro"] = room_types
        st.session_state["cols_filtro"] = default_columns

    # Cargamos valores desde el estado
    precio_val = st.session_state["precio_filtro"]
    room_val = st.session_state["room_filtro"]
    cols_val = st.session_state["cols_filtro"]

    # Filtros interactivos (sin key)
    with st.sidebar:
        st.markdown("### Filtros de búsqueda")
        precio_val = st.slider("Filtrar por rango de precios", precio_min, precio_max, precio_val)
        room_val = st.multiselect("Filtrar por tipo de cuarto", options=room_types, default=room_val)
        cols_val = st.multiselect("Selecciona columnas a visualizar", options=df.columns.tolist(), default=cols_val)

    # Guardamos valores nuevamente al estado (para siguiente renderizado)
    st.session_state["precio_filtro"] = precio_val
    st.session_state["room_filtro"] = room_val
    st.session_state["cols_filtro"] = cols_val

    # Aplicar filtros al DataFrame
    df_filtrado = df[
        (df["price"] >= precio_val[0]) &
        (df["price"] <= precio_val[1]) &
        (df["room_type"].isin(room_val))
    ]

    # Mostrar DataFrame filtrado y resumen
    st.markdown(f"### Datos filtrados ({df_filtrado.shape[0]} filas)")
    st.dataframe(df_filtrado[cols_val], use_container_width=True)

    st.markdown("### Resumen estadístico")
    st.dataframe(df_filtrado.describe(), use_container_width=True)

    # Visualizaciones
    colh1, colh2 = st.columns(2)
    with colh1:
        st.markdown("#### Histograma de Precios")
        fig_price = px.histogram(df_filtrado, x="price", nbins=30, title="Distribución de precios", color_discrete_sequence=color_victoria)
        st.plotly_chart(fig_price, use_container_width=True)
    with colh2:
        st.markdown("#### Tipos de habitación")
        fig_room = px.histogram(df_filtrado, x="room_type", title="Distribución por tipo de habitación", color="room_type", color_discrete_sequence=px.colors.qualitative.Pastel)
        st.plotly_chart(fig_room, use_container_width=True)


#Graficos importantes 
elif pagina == "Gráficos":
    st.title("Extracción de características")
    st.subheader("Visualización de múltiples gráficos independientes")
    st.markdown("---")

    tabs = st.tabs(["Pieplots", "Barplots", "Lineplots", "Scatterplots"])

    # ======== Pieplots ========
    with tabs[0]:
        st.markdown("## Características dicotómicas")
        row1_pie1, row1_pie2 = st.columns(2)
        row2_pie1, row2_pie2 = st.columns(2)

        with row1_pie1:
            st.markdown("### host_identity_verified")
            fig_pie1 = px.pie(df, names="host_identity_verified", color_discrete_sequence=color_indice(0))
            st.plotly_chart(fig_pie1, use_container_width=True)

        with row1_pie2:
            st.markdown("### host_is_superhost")
            fig_pie2 = px.pie(df, names="host_is_superhost", color_discrete_sequence=color_indice(1))
            st.plotly_chart(fig_pie2, use_container_width=True)

        with row2_pie1:
            st.markdown("### has_availability")
            fig_pie3 = px.pie(df, names="has_availability", color_discrete_sequence=color_indice(2))
            st.plotly_chart(fig_pie3, use_container_width=True)

        with row2_pie2:
            st.markdown("### instant_bookable")
            fig_pie4 = px.pie(df, names="instant_bookable", color_discrete_sequence=color_indice(3))
            st.plotly_chart(fig_pie4, use_container_width=True)

    # ======== Barplots ========
    with tabs[1]:
        st.markdown("## Top 4 categorías más frecuentes")

        # Convertir fechas
        df["host_since"] = pd.to_datetime(df["host_since"], errors='coerce')
        df["host_year"] = df["host_since"].dt.year

        # Función para graficar Top 4 categorías por conteo (no promedio)
        def top4_barplot(df, categoria, y_col="price", i=0):
            top4 = df[categoria].value_counts().nlargest(4).index
            df_top = df[df[categoria].isin(top4)]
            agrupado = df_top.groupby(categoria)[y_col].mean().reset_index()
            fig = px.bar(agrupado, x=categoria, y=y_col, title=f"Top 4 {categoria} vs Promedio {y_col}",  color_discrete_sequence=color_indice(i))
            return fig

        row1_bar1, row1_bar2 = st.columns(2)
        row2_bar1, row2_bar2 = st.columns(2)

        with row1_bar1:
            st.markdown("### room_type vs price")
            fig_bar1 = px.bar(df.groupby("room_type")["price"].mean().reset_index(), 
                            x="room_type", y="price", 
                            title="room_type vs Promedio price",color_discrete_sequence=color_victoria)
            st.plotly_chart(fig_bar1, use_container_width=True)

        with row1_bar2:
            st.markdown("### property_type")
            fig_bar2 = top4_barplot(df, "property_type", i=1)
            st.plotly_chart(fig_bar2, use_container_width=True)

        with row2_bar1:
            st.markdown("### host_year")
            fig_bar3 = top4_barplot(df, "host_year", i=2)
            st.plotly_chart(fig_bar3, use_container_width=True)

        with row2_bar2:
            st.markdown("### neighbourhood_cleansed")
            fig_bar4 = top4_barplot(df, "neighbourhood_cleansed", i=3)
            st.plotly_chart(fig_bar4, use_container_width=True)

    # ======== Lineplots ========
    with tabs[2]:
        st.markdown("## Agrupaciones realizadas por promedio")

        def agrupar_y_graficar_linea(df, x_col, y_col="price", i=0):
            agrupado = df.groupby(x_col)[y_col].mean().reset_index()
            fig = px.line(agrupado, x=x_col, y=y_col, title=f"Promedio de {y_col} por {x_col}", color_discrete_sequence=color_indice(i) )
            return fig

        row1_line1, row1_line2 = st.columns(2)
        row2_line1, row2_line2 = st.columns(2)

        with row1_line1:
            st.markdown("### availability_365 vs price")
            fig_line1 = agrupar_y_graficar_linea(df, "availability_365", i=0)
            st.plotly_chart(fig_line1, use_container_width=True)

        with row1_line2:
            st.markdown("### minimum_nights vs price")
            fig_line2 = agrupar_y_graficar_linea(df, "minimum_nights", i=1)
            st.plotly_chart(fig_line2, use_container_width=True)

        with row2_line1:
            st.markdown("### accommodates vs price")
            fig_line3 = agrupar_y_graficar_linea(df, "accommodates", i=2)
            st.plotly_chart(fig_line3, use_container_width=True)

        with row2_line2:
            st.markdown("### number_of_reviews vs price")
            fig_line4 = agrupar_y_graficar_linea(df, "number_of_reviews", i=3)
            st.plotly_chart(fig_line4, use_container_width=True)

    # ======== Scatterplots ========
    with tabs[3]:

        st.markdown("## Gráficos de disperciones")
        row1_scatter1, row1_scatter2 = st.columns(2)
        row2_scatter1, row2_scatter2 = st.columns(2)

        with row1_scatter1:
            st.markdown("### price vs number_of_reviews")
            fig_scatter1 = px.scatter(
                df,
                x="number_of_reviews", y="price",
                color_discrete_sequence=color_indice(0)
            )
            st.plotly_chart(fig_scatter1, use_container_width=True)

        with row1_scatter2:
            st.markdown("### price vs minimum_nights")
            fig_scatter2 = px.scatter(
                df,
                x="minimum_nights", y="price",
                color_discrete_sequence=color_indice(1)
            )
            st.plotly_chart(fig_scatter2, use_container_width=True)

        with row2_scatter1:
            st.markdown("### price vs availability_365")
            fig_scatter3 = px.scatter(
                df,
                x="availability_365", y="price",
                color_discrete_sequence=color_indice(2)
            )
            st.plotly_chart(fig_scatter3, use_container_width=True)

        with row2_scatter2:
            st.markdown("### price vs accommodates")
            fig_scatter4 = px.scatter(
                df,
                x="accommodates", y="price",
                color_discrete_sequence=color_indice(3)
            )
            st.plotly_chart(fig_scatter4, use_container_width=True)

#REGRESIONES 
elif pagina == "Regresion": 
    from sklearn.linear_model import LinearRegression, LogisticRegression  
    import seaborn as sns
    import matplotlib.pyplot as plt
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
    from sklearn.preprocessing import StandardScaler
    import plotly.graph_objects as go

    def crear_donut_metrica(valor, etiqueta, color="#83C5BE"):
        fig = go.Figure(go.Pie(
            values=[valor, 1 - valor],
            labels=["", ""],
            hole=0.7,
            marker_colors=[color, "#F0F0F0"],
            textinfo='none'
        ))
        fig.update_layout(
            showlegend=False,
            margin=dict(t=0, b=0, l=0, r=0),
            annotations=[dict(
                text=f"<b>{etiqueta}</b><br><span style='font-size:24px'>{valor:.2f}</span>",
                x=0.5, y=0.5, font_size=20, showarrow=False, align='center'
            )],
            width=220, height=220
        )
        return fig

    def estilizar_tabla(df):
        return df.style.set_properties(**{
            'background-color': '#F5F5F5',
            'color': '#2C3E50',
            'border-color': '#DDD',
            'text-align': 'center'
        }).set_table_styles([
            {'selector': 'th', 'props': [('background-color', '#B39CD0'), ('color', 'white'), ('font-weight', 'bold')]}
        ])

    st.title("Modelado Predictivo")

    columnas_a_excluir = [
        "id", "latitude", "longitude",
        "calculated_host_listings_count",
        "calculated_host_listings_count_entire_homes",
        "calculated_host_listings_count_private_rooms",
        "calculated_host_listings_count_shared_rooms"
    ]

    df_temp = df.copy()
    df_temp["target"] = df_temp["room_type"].apply(lambda x: 1 if x == "Entire home/apt" else 0)
    corr_target = df_temp[list(numeric_cols) + ["target"]].corr()["target"].abs()
    columnas_baja_corr = corr_target[corr_target < 0.05].index.tolist()
    columnas_a_eliminar = list(set(columnas_a_excluir + columnas_baja_corr))
    numeric_cols_limpias = [col for col in numeric_cols if col not in columnas_a_eliminar]

    with st.sidebar:
        mostrar_heatmap = st.checkbox("Mostrar Heatmap de Correlación", value=True)
        st.markdown("### Selección de regresión")
        regresion = st.selectbox(label="Tipo de regresión", options=["Simple", "Multiple", "Logistica"])

    if mostrar_heatmap:
        st.subheader("Heatmap de correlación de variables numéricas")
        corr_abs = df[numeric_cols_limpias].corr().abs()
        mask_corr = corr_abs >= 0.3
        vars_filtradas = corr_abs.columns[mask_corr.any()].tolist()
        corr_filtrada = corr_abs.loc[vars_filtradas, vars_filtradas].round(2)

        fig = px.imshow(
            corr_filtrada,
            text_auto=True,
            color_continuous_scale=["#F5F5F5", "#B39CD0", "#6A3FA0"]
        )
        fig.update_layout(width=1500, height=800)
        st.plotly_chart(fig, use_container_width=True)

    if regresion == "Simple":
        st.subheader("Regresión Lineal Simple")
        tabs = st.tabs(["Entire home/apt", "Private room", "Shared room", "Hotel room"])
        tipos = {
            "Entire home/apt": "Entire home/apt",
            "Private room": "Private room",
            "Shared room": "Shared room",
            "Hotel room": "Hotel room"
        }
        for nombre_tab, tipo in zip(tipos.keys(), tabs):
            with tipo:
                df_tipo = df[df["room_type"] == tipos[nombre_tab]].dropna(subset=["price"])
                if df_tipo.shape[0] < 2:
                    st.warning("No hay suficientes datos para este tipo de cuarto.")
                    continue
                corr_subset = df_tipo[numeric_cols_limpias].corr()["price"].abs()
                top_vars = corr_subset.drop("price", errors="ignore").sort_values(ascending=False).head(5)
                top_vars = top_vars[top_vars >= 0.1]
                if top_vars.empty:
                    st.warning("No hay variables suficientemente correlacionadas con 'price'.")
                    continue
                with st.sidebar:
                    seleccion = st.selectbox("Variable independiente:", options=top_vars.index.tolist(), key=f"{tipo}_selector")
                df_valid = df_tipo.dropna(subset=[seleccion])
                X = df_valid[[seleccion]]
                y = df_valid["price"]
                model = LinearRegression()
                model.fit(X, y)
                y_pred = model.predict(X)

                # Gráfico 1: Dispersión de datos reales
                st.markdown("### Dispersión de datos reales")

                df_plot = df_valid.copy()
                df_plot["room_type_numeric"] = df_plot["room_type"].astype("category").cat.codes

                fig_disp = px.scatter()
                fig_disp.add_scatter(x=df_plot["room_type_numeric"], y=df_plot["price"], mode="markers",
                                    name="room_type vs price", marker=dict(color="#78B6C3"))
                fig_disp.add_scatter(x=df_plot[seleccion], y=df_plot["price"], mode="markers",
                                    name=f"{seleccion} vs price", marker=dict(color="#3E7D95"))
                fig_disp.update_layout(
                    title=f"{nombre_tab}: Dispersión de room_type y {seleccion} vs price",
                    xaxis_title="room_type (codificada)",
                    yaxis_title="price"
                )
                st.plotly_chart(fig_disp, use_container_width=True)

                st.markdown("### Gráfico de Regresión")
                fig = px.scatter(x=df_valid[seleccion], y=y, labels={"x": seleccion, "y": "price"},
                                 title=f"{nombre_tab}: Real vs Predicho")
                fig.add_scatter(x=df_valid[seleccion], y=y_pred, mode="lines", name="Predicción", line=dict(color="#3E7D95"))
                st.plotly_chart(fig, use_container_width=True)
                st.markdown("### Métricas del modelo")
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.plotly_chart(crear_donut_metrica(model.coef_[0], "Coeficiente"))
                with col2:
                    st.plotly_chart(crear_donut_metrica(model.intercept_, "Intercepto"))
                with col3:
                    st.plotly_chart(crear_donut_metrica(model.score(X, y), "R²"))
                tabla_corr = top_vars.reset_index()
                tabla_corr.columns = ["Variable", "|Correlación con price|"]
                st.dataframe(estilizar_tabla(tabla_corr), use_container_width=True)

    if regresion == "Multiple":
        st.subheader("Regresión Lineal Múltiple")
        mejores_modelos = [
            ("availability_60", "availability_90"),
            ("availability_30", "availability_60"),
            ("bedrooms", "accommodates"),
            ("availability_90", "availability_30")
        ]
        mostrar_modelos = st.checkbox("Mostrar modelos simples", value=True)
        if mostrar_modelos:
            st.markdown("#### Modelos de regresión lineal simple")
            for i in range(0, len(mejores_modelos), 2):
                cols = st.columns(2)
                for j, (var_dep, var_indep) in enumerate(mejores_modelos[i:i+2]):
                    with cols[j]:
                        df_simple = df.dropna(subset=[var_dep, var_indep])
                        X = df_simple[[var_indep]]
                        y = df_simple[var_dep]
                        modelo = LinearRegression().fit(X, y)
                        y_pred = modelo.predict(X)
                        fig = px.scatter(x=X[var_indep], y=y, labels={"x": var_indep, "y": var_dep},
                                         title=f"{var_dep} vs {var_indep} (R² = {modelo.score(X, y):.2f})")
                        fig.add_scatter(x=X[var_indep], y=y_pred, mode="lines", name="Predicción", line=dict(color="#3E7D95"))
                        st.plotly_chart(fig, use_container_width=True)

        st.markdown("### Entrenamiento del modelo múltiple")
        posibles_targets = [x[0] for x in mejores_modelos]
        with st.sidebar:
            target = st.selectbox("Variable dependiente:", posibles_targets)
            disponibles = [col for col in numeric_cols_limpias if col != target]
            independientes = st.multiselect("Variables independientes:", options=disponibles)
        if independientes:
            df_model = df.dropna(subset=independientes + [target])
            X = df_model[independientes]
            y = df_model[target]
            modelo = LinearRegression()
            modelo.fit(X, y)
            y_pred = modelo.predict(X)
            r2 = modelo.score(X, y)
            st.markdown("### Gráfico Real vs Predicción")
            fig = px.scatter(x=y, y=y_pred, labels={"x": "Real", "y": "Predicción"}, title="Real vs Predicción")
            fig.add_shape(type="line", x0=y.min(), x1=y.max(), y0=y.min(), y1=y.max(), line=dict(color="#3E7D95", dash="dash"))
            st.plotly_chart(fig, use_container_width=True)
            col1, col2, col3 = st.columns(3)
            with col1:
                st.plotly_chart(crear_donut_metrica(r2, "R²"))
            
            with col3:
                st.plotly_chart(crear_donut_metrica(modelo.intercept_, "Intercepto"))
            
            comparacion = pd.DataFrame({
                "Variable": independientes,
                "Correlación individual": [df_model[[target, var]].corr().iloc[0, 1] for var in independientes],
                "R² del modelo": r2
            })
            comparacion["¿Mejora el modelo?"] = comparacion["R² del modelo"] > comparacion["Correlación individual"].abs()
            st.markdown("### Comparación de correlación vs modelo")
            st.dataframe(estilizar_tabla(comparacion), use_container_width=True)

    if regresion == "Logistica":
        st.subheader("Regresión Logística")
        df_log = df.copy()
        df_log["target"] = df_log["room_type"].apply(lambda x: "Entire Place" if x == "Entire home/apt" else "No Entire")
        for col in ["host_is_superhost", "host_identity_verified", "instant_bookable", "has_availability"]:
            df_log[col] = df_log[col].map({"t": 1, "f": 0})
        df_log["property_type"] = df_log["property_type"].apply(lambda x: "Entire Place" if "Entire" in str(x) else "No Entire")
        if "host_response_rate" in df_log.columns:
            df_log["host_response_rate"] = df_log["host_response_rate"].str.replace("%", "", regex=False)
            df_log["host_response_rate"] = pd.to_numeric(df_log["host_response_rate"], errors='coerce')
            df_log["host_response_rate"] = df_log["host_response_rate"].apply(lambda x: "Respuesta alta" if x >= 90 else "Respuesta baja")
        df_log["target_binaria"] = df_log["target"].map({"No Entire": 0, "Entire Place": 1})
        df_log = df_log.drop(columns=["room_type", "target"])
        df_log = pd.get_dummies(df_log, drop_first=True)
        y = df_log["target_binaria"]
        X = df_log.drop(columns=["target_binaria"])

        with st.sidebar:
            st.markdown("### Selección para Regresión Logística")
            seleccionadas = st.multiselect("Variables independientes", options=X.columns.tolist())

        if seleccionadas:
            X = X[seleccionadas]
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
            escalar = StandardScaler()
            X_train = escalar.fit_transform(X_train)
            X_test = escalar.transform(X_test)
            modelo = LogisticRegression(max_iter=1000)
            modelo.fit(X_train, y_train)
            y_pred = modelo.predict(X_test)
            cm = confusion_matrix(y_test, y_pred)
            cm_custom = np.array([[cm[1][1], cm[1][0]], [cm[0][1], cm[0][0]]])
            fig, ax = plt.subplots()
            disp = ConfusionMatrixDisplay(confusion_matrix=cm_custom)
            disp.plot(cmap="Greens", ax=ax, colorbar=True)
            ax.text(-0.35, -0.35, "VP\nVerdaderos Positivos", fontsize=8, color="white", weight="bold")
            ax.text(0.65, -0.35, "FP\nFalsos Positivos", fontsize=8, color="black", weight="bold")
            ax.text(-0.35, 0.65, "FN\nFalsos Negativos", fontsize=8, color="black", weight="bold")
            ax.text(0.65, 0.65, "VN\nVerdaderos Negativos", fontsize=8, color="green", weight="bold")
            st.pyplot(fig)
            tn, fp, fn, tp = cm.ravel()
            precision = tp / (tp + fp) if (tp + fp) > 0 else 0
            recall = tp / (tp + fn) if (tp + fn) > 0 else 0
            specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
            accuracy = (tp + tn) / (tp + tn + fp + fn)
            col1, col2 = st.columns(2)
            with col1:
                st.plotly_chart(crear_donut_metrica(precision, "Precisión"))
                st.plotly_chart(crear_donut_metrica(accuracy, "Exactitud"))
            with col2:
                st.plotly_chart(crear_donut_metrica(recall, "Sensibilidad"))
                st.plotly_chart(crear_donut_metrica(specificity, "Especificidad"))
        else:
            st.info("Selecciona al menos una variable para entrenar el modelo.")



#Mapa 
elif pagina == "Mapa": 

    st.title ("Mapa de ubicaciones")
    st.write("Ubicación general de pais")
    # Crear el dataframe con las ubicaciones
    ubicaciones = pd.DataFrame({
        'Ciudad': ['Victoria'],
        'lat': [48.4284],
        'lon': [-123.3656]
    })

    # Mostrar el mapa
    st.map(ubicaciones[['lat', 'lon']])

    st.sidebar.title("Mapeo")
    st.sidebar.header("Elección de lugar")
    if st.sidebar.button("Ver mapa de ubicaciones Victoria"):
        lugar = "Victoria"
        st.title("Ubicaciones de hospedajes")
        st.subheader("VICTORIA BC CANADA")

        #Carga de dataframe original
        df_original = pd.read_csv("Victoria.csv")
        df_original = df_original[
            (df_original["latitude"].between(48.3, 48.6)) &
            (df_original["longitude"].between(-123.6, -123.2))
        ]

        df_original = df_original.dropna(subset=["latitude", "longitude"])

        #merge para recuperar coordenadas
        df_mapa = pd.merge(df[["id", "price", "accommodates"]], df_original[["id", "latitude", "longitude"]], on="id", how="inner")


        df_mapa = df_mapa.rename(columns={"latitude": "lat", "longitude": "lon"})

        # Filtramos ubicaciones válidas
        df_mapa = df_mapa.dropna(subset=["lat", "lon"])
        df_mapa = df_mapa[(df_mapa["lat"].between(47, 49)) & (df_mapa["lon"].between(-125, -122))]

        # Creación del mapa
        layer = pdk.Layer(
            "ScatterplotLayer",
            data = df_mapa, 
            get_position = '[lon, lat]',
            get_radius=100,
            pickable = True,
            get_fill_color="""
                [ 
                    price < 100 ? 0 : price < 200 ? 255 : 200,
                    price < 100 ? 200 : price < 200 ? 140 : 0,
                    150,
                    160
                ]
            """
        )

        #Configuramos la vista del mapa
        view_state = pdk.ViewState(
            latitude = df_mapa["lat"].mean(),
            longitude = df_mapa["lon"].mean(),
            zoom=10,
            pitch=0,
        )

        #creamos tooltip
        tooltip = {
            "html": "<b>Precio:</b> ${price} <br/><b>Acomoda:</b> {accommodates} personas",
            "style": {"backgroundColor": "white", "color": "black"}
        }

        # Mostramos el mapa
        st.pydeck_chart(pdk.Deck(layers=[layer], initial_view_state=view_state, tooltip=tooltip, map_style="mapbox://styles/mapbox/light-v9"))
    

Overwriting airbnb.py
