In [6]:
# Importar librerías
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

import pandas as pd
import numpy as np

from scipy.optimize import curve_fit
from scipy import stats

from sklearn.metrics import (
    r2_score, mean_absolute_error, mean_squared_error,
    confusion_matrix, accuracy_score, precision_score,
    recall_score, roc_auc_score, roc_curve, classification_report, f1_score, precision_recall_curve, average_precision_score, balanced_accuracy_score
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression

import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler


In [7]:
Estocolmo = pd.read_csv('Datasets/estocolmofinal.csv')
Estocolmo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5315 entries, 0 to 5314
Data columns (total 79 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            5315 non-null   int64  
 1   listing_url                                   5315 non-null   object 
 2   scrape_id                                     5315 non-null   int64  
 3   last_scraped                                  5315 non-null   object 
 4   source                                        5315 non-null   object 
 5   name                                          5315 non-null   object 
 6   description                                   5315 non-null   object 
 7   neighborhood_overview                         5315 non-null   object 
 8   picture_url                                   5315 non-null   object 
 9   host_id                                       5315 non-null   i

In [8]:
# Tasas de cambio
# Tasas de cambio a EUR
exchange_rates = {
    'MXN': 0.052,  # México
    'SEK': 0.088,  # Suecia
    'EUR': 1.0     # Alemania y España
}


In [9]:
# Estandarizar price y estimated_revenue_l365d
Estocolmo['price_eur'] = Estocolmo['price'] * exchange_rates['SEK']
Estocolmo['estimated_revenue_eur'] = Estocolmo['estimated_revenue_l365d'] * exchange_rates['SEK']

In [10]:
Estocolmo['price_eur'].describe()

count     5315.000000
mean       156.997712
std        412.342051
min          9.240000
25%         88.000000
50%        116.248000
75%        156.684000
max      12426.040000
Name: price_eur, dtype: float64

In [11]:
Estocolmo['price'].describe()

count      5315.000000
mean       1784.064911
std        4685.705123
min         105.000000
25%        1000.000000
50%        1321.000000
75%        1780.500000
max      141205.000000
Name: price, dtype: float64

In [12]:
# Price_Cat Estomocol
umbral_precio_Estocolmo = Estocolmo['price_eur'].quantile(0.75) #1780
Estocolmo['price_cat'] = np.where(Estocolmo['price_eur'] >= umbral_precio_Estocolmo, 'High price', 'Low price')
Estocolmo['price_cat'].value_counts()


price_cat
Low price     3986
High price    1329
Name: count, dtype: int64

In [13]:
Estocolmo.drop(['price','estimated_revenue_l365d'], axis=1)
Estocolmo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5315 entries, 0 to 5314
Data columns (total 82 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            5315 non-null   int64  
 1   listing_url                                   5315 non-null   object 
 2   scrape_id                                     5315 non-null   int64  
 3   last_scraped                                  5315 non-null   object 
 4   source                                        5315 non-null   object 
 5   name                                          5315 non-null   object 
 6   description                                   5315 non-null   object 
 7   neighborhood_overview                         5315 non-null   object 
 8   picture_url                                   5315 non-null   object 
 9   host_id                                       5315 non-null   i

In [14]:
Mexico = pd.read_csv('Datasets/Mexico_City.csv')
Mexico.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26401 entries, 0 to 26400
Data columns (total 76 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            26401 non-null  float64
 1   listing_url                                   26401 non-null  object 
 2   scrape_id                                     26401 non-null  float64
 3   last_scraped                                  26401 non-null  object 
 4   source                                        26401 non-null  object 
 5   name                                          26401 non-null  object 
 6   description                                   26401 non-null  object 
 7   neighborhood_overview                         26401 non-null  object 
 8   picture_url                                   26401 non-null  object 
 9   host_id                                       26401 non-null 

In [15]:
Mexico['price_eur'] = Mexico['price'] * exchange_rates['MXN']
Mexico['estimated_revenue_eur'] = Mexico['estimated_revenue_l365d'] * exchange_rates['MXN']

In [16]:
Mexico['price_eur'].describe()

count    26401.000000
mean        54.871055
std         29.444398
min          3.432000
25%         30.264000
50%         50.076000
75%         70.278000
max        152.464000
Name: price_eur, dtype: float64

In [17]:
Mexico['price'].describe()

count    26401.000000
mean      1055.212587
std        566.238420
min         66.000000
25%        582.000000
50%        963.000000
75%       1351.500000
max       2932.000000
Name: price, dtype: float64

In [18]:
# Creación de price_cat
umbral_precio_Mexico = Mexico['price_eur'].quantile(0.75)
Mexico['price_cat'] = np.where(Mexico['price_eur'] >= umbral_precio_Mexico, 'High price', 'Low price')
Mexico['price_cat'].value_counts()

price_cat
Low price     19607
High price     6794
Name: count, dtype: int64

In [19]:
Mexico.drop(['price', 'estimated_revenue_l365d'], axis=1)
Mexico.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26401 entries, 0 to 26400
Data columns (total 79 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            26401 non-null  float64
 1   listing_url                                   26401 non-null  object 
 2   scrape_id                                     26401 non-null  float64
 3   last_scraped                                  26401 non-null  object 
 4   source                                        26401 non-null  object 
 5   name                                          26401 non-null  object 
 6   description                                   26401 non-null  object 
 7   neighborhood_overview                         26401 non-null  object 
 8   picture_url                                   26401 non-null  object 
 9   host_id                                       26401 non-null 

In [20]:
Berlin = pd.read_csv('Datasets/Berlin_86.csv')
Berlin.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14187 entries, 0 to 14186
Data columns (total 87 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   Unnamed: 0                                    14187 non-null  int64  
 1   listing_url                                   14187 non-null  object 
 2   last_scraped                                  14187 non-null  object 
 3   source                                        14187 non-null  object 
 4   name                                          14187 non-null  object 
 5   description                                   14187 non-null  object 
 6   neighborhood_overview                         14187 non-null  object 
 7   picture_url                                   14187 non-null  object 
 8   host_id                                       14187 non-null  int64  
 9   host_url                                      14187 non-null 

In [21]:
Berlin['price'].describe()

count    14187.000000
mean       138.829426
std        107.554381
min          5.000000
25%         90.000000
50%        115.000000
75%        173.672536
max       3923.000000
Name: price, dtype: float64

In [22]:
# Renombrar columnas
Berlin.rename(columns= {
    'price': 'price_eur', 
    'estimated_revenue_l365d': 'estimated_revenue_eur'}
    ,inplace= True)

In [23]:
Valencia = pd.read_csv('Datasets/valencia_trabajo.csv')
Valencia.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9009 entries, 0 to 9008
Data columns (total 79 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            9009 non-null   int64  
 1   listing_url                                   9009 non-null   object 
 2   scrape_id                                     9009 non-null   int64  
 3   last_scraped                                  9009 non-null   object 
 4   source                                        9009 non-null   object 
 5   name                                          9009 non-null   object 
 6   description                                   9009 non-null   object 
 7   neighborhood_overview                         9009 non-null   object 
 8   picture_url                                   9009 non-null   object 
 9   host_id                                       9009 non-null   i

In [24]:
Valencia['price'].describe()

count    9009.000000
mean      101.443613
std        44.644134
min         8.000000
25%        72.000000
50%       101.443613
75%       125.000000
max       234.000000
Name: price, dtype: float64

In [25]:
Valencia.rename(columns= {'price':'price_eur', 'estimated_revenue_l365d':'estimated_revenue_eur'}, inplace= True)

In [26]:
# price_cat
umbral_precio_Valencia = Valencia['price_eur'].quantile(0.75)
Valencia['price_cat'] = np.where(Valencia['price_eur'] >= umbral_precio_Valencia, 'High price', 'Low price')
Valencia['price_cat'].value_counts()

price_cat
Low price     6725
High price    2284
Name: count, dtype: int64

In [27]:
#Estocolmo.to_csv('Estocolmo_Final.csv')
#Mexico.to_csv('Mexico_Final.csv')
#Berlin.to_csv('Berlin_Final.csv')
#Valencia.to_csv('Valencia_Final.csv')

In [None]:
%%writefile Dashboard_Final.py
# Dashboard Final equipo — Proyecto Airbnb (By Raymundo Díaz + IA + Profe Freddy)
# Versión final optimizada y multi-país comparativo

##########
# Importar librerías
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
from sklearn.metrics import (
    r2_score, mean_absolute_error, mean_squared_error,
    confusion_matrix, accuracy_score, precision_score,
    recall_score, roc_auc_score, roc_curve, classification_report, f1_score,
    precision_recall_curve, average_precision_score, balanced_accuracy_score
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

##########
# Configuración global
st.set_page_config(
    page_title="Airbnb (Data Web)",
    page_icon="assets/icon.jpg",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Paleta Airbnb
AIRBNB_RED   = "#FF5A5F"
AIRBNB_TEAL  = "#00A699"
AIRBNB_ORANGE= "#FC642D"
AIRBNB_GRAY  = "#BFBFBF"
AIRBNB_DARK_BG = "#0E1117"
AIRBNB_CARD   = "#151A22"
AIRBNB_BORDER = "#232A35"
CONT_GRADIENT = "Reds"

##########
# CSS Look & Feel Airbnb
st.markdown(f"""
<style>
.block-container {{ padding-top: 1.2rem; padding-bottom: 2rem; }}
html, body, [data-testid="stAppViewContainer"], section[data-testid="stSidebar"] {{
    background: radial-gradient(circle at 30% 30%, #131722 0%, #0E1117 100%) !important;
    color: white !important;
}}
section[data-testid="stSidebar"] {{ border-right: 1px solid {AIRBNB_BORDER}; }}
.air-card {{
    border: 1px solid {AIRBNB_BORDER};
    border-radius:16px; padding:1rem;
    background:{AIRBNB_CARD};
}}
.stButton>button {{
    background:{AIRBNB_RED}; color:white; border-radius:12px; border:none;
    padding:.6rem 1rem; font-weight:600;
}}
.stButton>button:hover {{ opacity:.9 }}
.stDataFrame, .stTable {{ color: white !important; }}
</style>
""", unsafe_allow_html=True)

##########
# Plotly: plantilla Airbnb
AIRBNB_COLORWAY = ["#FF5A5F", "#00A699", "#FC642D", "#BFBFBF", "#767676"]
pio.templates["airbnb_dark"] = pio.templates["plotly_dark"]
pio.templates["airbnb_dark"].layout.colorway = AIRBNB_COLORWAY
px.defaults.template = "airbnb_dark"
px.defaults.color_continuous_scale = CONT_GRADIENT
px.defaults.height = 420

##########
# Multi-país
COUNTRY_FILES = {
    "Alemania": "Berlin_Final.csv",
    "Valencia": "Valencia_Final.csv",
    "Estocolmo": "Estocolmo_Final.csv",
    "Mexico": "Mexico_Final.csv",
}
COUNTRY_IMAGES = {
    "Alemania": ["assets/Berlin1.jpg", "assets/Berlin3.jpg", "assets/Berlin2.jpg"],
    "Valencia": ["assets/Valencia1.jpg", "assets/Valencia2.jpg", "assets/Valencia3.jpg"],
    "Estocolmo": ["assets/Estocolmo1.jpg", "assets/Estocolmo2.jpg", "assets/Estocolmo3.jpg"],
    "Mexico": ["assets/Mexico1.jpg", "assets/Mexico2.jpg", "assets/Mexico3.jpg"],
}

##########
# Normalización y carga base
BIN_TRUE = {"t","true","True",1,"1",True}
BIN_FALSE= {"f","false","False",0,"0",False}

def _normalize_binary(series):
    s = series.copy()
    return s.apply(lambda v: 1 if v in BIN_TRUE else (0 if v in BIN_FALSE else np.nan)).astype("float")

def _normalize_df(df_raw):
    df = df_raw.copy()
    df = df.drop(['Unnamed: 0','latitude','longitude'], axis=1, errors="ignore")
    if 'host_id' in df.columns:
        df['host_id'] = df['host_id'].astype(str)
    for col in ['host_is_superhost','host_identity_verified','instant_bookable']:
        if col in df.columns:
            df[col] = _normalize_binary(df[col])
    for col in ['host_response_rate','host_acceptance_rate','price']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

@st.cache_data(show_spinner=False)
def load_country_df(country: str):
    raw = pd.read_csv(COUNTRY_FILES[country])
    df = _normalize_df(raw)
    Lista = [
        'host_is_superhost','host_identity_verified','host_response_time',
        'host_response_rate','host_acceptance_rate','host_total_listings_count',
        'host_verifications','room_type','property_type','price_cat'
    ]
    return df, Lista

#  Helpers multi-país 
@st.cache_data(show_spinner=False)
def load_all_countries():
    data = {}
    for c in COUNTRY_FILES.keys():
        df_i, lista_i = load_country_df(c)
        data[c] = {"df": df_i, "lista": lista_i}
    return data

def _top_freq(df, var_cat, k=10):
    if var_cat not in df.columns: return None
    tab = df[var_cat].value_counts(dropna=False).reset_index().head(k)
    tab.columns = ["categorias","frecuencia"]
    return tab

def _grid_2x2_figs(figs, titles):
    cols = st.columns(2)
    for i, (fig, title) in enumerate(zip(figs, titles)):
        with cols[i % 2]:
            if title: st.caption(f"**{title}**")
            if fig is not None:
                st.plotly_chart(fig, use_container_width=True)
            else:
                st.info("No disponible para este país.")

def _numdf(df): return df.select_dtypes(include=["float","int"]).copy()

##########
# Header
col_logo, col_title = st.columns([1,5], vertical_alignment="center")
with col_logo:
    st.image("assets/Logo3.jpg", width=90)
with col_title:
    st.markdown("""
        # Airbnb Data Analysis — Multi-país
        <span style="color:#767676">Comparativa entre Alemania, Valencia, Estocolmo y México</span>
    """, unsafe_allow_html=True)

##########
# Sidebar
st.sidebar.image("assets/Logoo.jpg", use_container_width=True)
st.sidebar.caption("Análisis exploratorio y modelos comparativos")
st.sidebar.markdown("---")

modo_presentacion = st.sidebar.toggle("Modo presentación", value=False)
View = st.sidebar.selectbox(
    label='Tipo de análisis',
    options=['Extracción de Características', 'Regresión Lineal', 'Regresión No Lineal', 'Regresión Logística'],
    index=0
)

##########################################################################################
# VISTA 1 — EXTRACCIÓN DE CARACTERÍSTICAS
if View == "Extracción de Características":
    all_data = load_all_countries()

    st.title("Extracción de Características — 4 países")
    st.caption("Comparación de distribuciones categóricas entre países Airbnb.")

    Variable_Cat = st.sidebar.selectbox("Variable categórica a analizar", options=list(all_data["Alemania"]["lista"]))
    k_top = st.sidebar.slider("Top categorías a mostrar", 5, 20, 10, 1)

    # --- Distribución por categoría (Bar) ---
    st.subheader("Distribución por categoría (Bar Plot)")
    bar_figs, titles = [], []
    for c, pack in all_data.items():
        df_i = pack["df"]
        tab = _top_freq(df_i, Variable_Cat, k=k_top)
        if tab is not None and len(tab) > 0:
            fig = px.bar(tab, x="categorias", y="frecuencia", color="categorias")
        else:
            fig = None
        bar_figs.append(fig); titles.append(c)
    _grid_2x2_figs(bar_figs, titles)

    # --- Pie Chart ---
    st.subheader("Proporción por categoría (Pie Chart)")
    pie_figs, titles = [], []
    for c, pack in all_data.items():
        df_i = pack["df"]
        tab = _top_freq(df_i, Variable_Cat, k=k_top)
        if tab is not None:
            fig = px.pie(tab, names="categorias", values="frecuencia")
        else:
            fig = None
        pie_figs.append(fig); titles.append(c)
    _grid_2x2_figs(pie_figs, titles)

    # --- Donut Chart ---
    st.subheader("Visualización tipo dona")
    donut_figs, titles = [], []
    for c, pack in all_data.items():
        df_i = pack["df"]
        tab = _top_freq(df_i, Variable_Cat, k=k_top)
        if tab is not None:
            fig = px.pie(tab, names="categorias", values="frecuencia", hole=0.5)
        else:
            fig = None
        donut_figs.append(fig); titles.append(c)
    _grid_2x2_figs(donut_figs, titles)

    # --- Área acumulada ---
    st.subheader("Tendencia acumulada (Área)")
    area_figs, titles = [], []
    for c, pack in all_data.items():
        df_i = pack["df"]
        tab = _top_freq(df_i, Variable_Cat, k=k_top)
        if tab is not None:
            tab_sorted = tab.sort_values(by="frecuencia", ascending=False)
            fig = px.area(tab_sorted, x="categorias", y="frecuencia")
        else:
            fig = None
        area_figs.append(fig); titles.append(c)
    _grid_2x2_figs(area_figs, titles)

    # --- Análisis más profundo ---
    st.markdown("---")
    st.subheader("Análisis más profundo (Boxplot/Heatmap)")
    deep_figs, titles = [], []
    for c, pack in all_data.items():
        df_i = pack["df"]
        if Variable_Cat in ['room_type','property_type','price_cat'] and 'price' in df_i.columns:
            if not df_i[[Variable_Cat,'price']].dropna().empty:
                fig = px.box(df_i, x=Variable_Cat, y='price', color=Variable_Cat)
            else:
                fig = None
        else:
            heat_df = pd.crosstab(index=df_i[Variable_Cat], columns='count', normalize='columns') * 100
            fig = px.imshow(heat_df, color_continuous_scale=CONT_GRADIENT) if not heat_df.empty else None
        deep_figs.append(fig); titles.append(c)
    _grid_2x2_figs(deep_figs, titles)

##########################################################################################
# ======== VISTA 2 — REGRESIÓN LINEAL MULTI-PAÍS
if View == "Regresión Lineal":
    all_data = load_all_countries()
    st.title("Regresión Lineal — 4 países")

    num_cols = list(_numdf(all_data["Alemania"]["df"]).columns)
    colL, colR = st.columns(2)
    with colL:
        Variable_y = st.selectbox("Variable dependiente (Y)", options=num_cols, key="lin_y")
    with colR:
        Variable_x = st.selectbox("Variable independiente (X)", options=num_cols, key="lin_x")

    st.subheader("Dispersión + recta ajustada (Y ~ X)")
    fig_list, title_list, rows = [], [], []
    for c, pack in all_data.items():
        df_i = _numdf(pack["df"]).dropna(subset=[Variable_x, Variable_y])
        if len(df_i) < 10:
            fig, met = None, None
        else:
            X = df_i[[Variable_x]].values; y = df_i[Variable_y].values
            mdl = LinearRegression().fit(X, y)
            yhat = mdl.predict(X)
            fig = px.scatter(df_i, x=Variable_x, y=Variable_y, opacity=0.6, title=f"{c}")
            fig.add_trace(go.Scatter(x=np.sort(X[:,0]), y=np.sort(yhat), mode="lines", name="Ajuste"))
            met = {"R2": mdl.score(X,y), "R": np.sqrt(abs(mdl.score(X,y))),
                   "Coef": mdl.coef_[0], "Intercepto": mdl.intercept_}
        fig_list.append(fig); title_list.append(c)
        if met: rows.append({"País": c, **met})
        else: rows.append({"País": c, "R2": np.nan, "R": np.nan, "Coef": np.nan, "Intercepto": np.nan})

    _grid_2x2_figs(fig_list, title_list)
    st.markdown("**Comparativa de métricas:**")
    st.dataframe(pd.DataFrame(rows), use_container_width=True)

##########################################################################################
# VISTA 3 — REGRESIÓN NO LINEAL MULTI-PAÍS
if View == "Regresión No Lineal":
    all_data = load_all_countries()
    st.title("Regresión No Lineal — 4 países")

    num_cols = list(_numdf(all_data["Alemania"]["df"]).columns)
    Variable_y = st.selectbox("Variable dependiente (Y)", options=num_cols)
    Variable_x = st.selectbox("Variable independiente (X)", options=[c for c in num_cols if c != Variable_y])
    Modelo = st.selectbox("Modelo no lineal", options=[
        "Función cuadrática (a*x**2 + b*x + c)",
        "Función cúbica (a*x**3 + b*x**2 + c*x + d)",
        "Función exponencial (a*np.exp(-b*x)+c)",
        "Función potencia (a*x**b)"
    ])

    def fit_curve(df, x, y, model):
        x_, y_ = df[x].dropna(), df[y].dropna()
        if len(x_) < 15 or len(y_) < 15:
            return None, None
        x_ = x_.to_numpy(); y_ = y_.to_numpy()
        if model.startswith("Función cuadrática"):
            def f(x,a,b,c): return a*x**2+b*x+c
        elif model.startswith("Función cúbica"):
            def f(x,a,b,c,d): return a*x**3+b*x**2+c*x+d
        elif model.startswith("Función exponencial"):
            def f(x,a,b,c): return a*np.exp(-b*x)+c
        else:
            def f(x,a,b): return a*np.power(x,b)
        try:
            pars,_=curve_fit(f,x_,y_,maxfev=20000)
            yhat=f(x_,*pars)
            r2=r2_score(y_,yhat)
            fig=px.scatter(df,x=x,y=y,opacity=0.6)
            fig.add_trace(go.Scatter(x=np.sort(x_),y=f(np.sort(x_),*pars),mode="lines"))
            return fig,{"R2":r2,"R":np.sqrt(abs(r2))}
        except:
            return None,None

    figs, titles, rows = [], [], []
    for c, pack in all_data.items():
        df_i = _numdf(pack["df"])
        fig, met = fit_curve(df_i, Variable_x, Variable_y, Modelo)
        figs.append(fig); titles.append(c)
        if met: rows.append({"País": c, **met})
        else: rows.append({"País": c, "R2": np.nan, "R": np.nan})
    _grid_2x2_figs(figs, titles)
    st.markdown("**Métricas comparadas:**")
    st.dataframe(pd.DataFrame(rows), use_container_width=True)

##########################################################################################
# VISTA 4 — REGRESIÓN LOGÍSTICA MULTI-PAÍS
##########################################################################################
if View == "Regresión Logística":
    all_data = load_all_countries()
    st.title("Regresión Logística — 4 países")

    d0 = all_data["Alemania"]["df"]
    dico_cols = [c for c in d0.columns if d0[c].dropna().nunique() == 2]
    num_cols = list(_numdf(d0).columns)

    Variable_y = st.sidebar.selectbox("Variable dependiente (binaria)", options=dico_cols)
    Variables_x = st.sidebar.multiselect("Variables independientes (numéricas)", options=num_cols)
    test_size = st.sidebar.slider("Tamaño de prueba", 0.1, 0.5, 0.3)
    thr = st.sidebar.slider("Umbral", 0.05, 0.95, 0.5)
    imb_method = st.sidebar.selectbox("Balance de clases", ["Ninguno", "SMOTE", "Under", "class_weight='balanced'"])

    st.subheader("Métricas por país")
    rows = []
    for c, pack in all_data.items():
        df_i = pack["df"]
        if Variable_y not in df_i.columns or len(Variables_x)==0:
            rows.append({"País":c,"AUC":np.nan,"Balanced Acc":np.nan,"F1":np.nan}); continue
        df_i = df_i.dropna(subset=[Variable_y]+Variables_x)
        if df_i.empty: 
            rows.append({"País":c,"AUC":np.nan,"Balanced Acc":np.nan,"F1":np.nan}); continue

        clases = df_i[Variable_y].dropna().unique().tolist()
        if len(clases)!=2: continue
        y = df_i[Variable_y].map({clases[0]:0,clases[1]:1}).values
        X = df_i[Variables_x].astype(float).values
        X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=test_size,stratify=y,random_state=42)
        sc=StandardScaler(); X_train=sc.fit_transform(X_train); X_test=sc.transform(X_test)

        if imb_method=="SMOTE": X_train,y_train=SMOTE(random_state=42).fit_resample(X_train,y_train)
        if imb_method=="Under": X_train,y_train=RandomUnderSampler(random_state=42).fit_resample(X_train,y_train)

        model=LogisticRegression(max_iter=1000,class_weight=('balanced' if imb_method=="class_weight='balanced'" else None))
        model.fit(X_train,y_train)
        proba=model.predict_proba(X_test)[:,1]
        pred=(proba>=thr).astype(int)

        auc=roc_auc_score(y_test,proba)
        bacc=balanced_accuracy_score(y_test,pred)
        f1m=f1_score(y_test,pred,zero_division=0)
        rows.append({"País":c,"AUC":auc,"Balanced Acc":bacc,"F1":f1m})

    st.dataframe(pd.DataFrame(rows), use_container_width=True)

##########################################################################################
# FOOTER
##########################################################################################
st.markdown("---")
st.markdown("""
<div style="text-align:center; opacity:0.8; font-size:0.9rem;">
© Proyecto para Gestión de Proyectos — Dashboard creado por <b>Raymundo Díaz</b> con ayuda de IA y profe Freddy.  
<br>Comparativo multi-país de Airbnb con Streamlit y Plotly.
</div>
""", unsafe_allow_html=True)



Overwriting Dashboard_Final.py


In [41]:
%%writefile Dashboard_prueba.py
# Dashboard Final equipo — Proyecto Airbnb (By Raymundo Díaz + IA + Profe Freddy)
# Versión final optimizada y revisada

##########
# Importar librerías
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
from sklearn.metrics import (
    r2_score, mean_absolute_error, mean_squared_error,
    confusion_matrix, accuracy_score, precision_score,
    recall_score, roc_auc_score, roc_curve, classification_report, f1_score,
    precision_recall_curve, average_precision_score, balanced_accuracy_score
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

##########
# Configuración global
st.set_page_config(
    page_title="Airbnb (Data Web)",
    page_icon="assets/icon.jpg",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Paleta Airbnb
AIRBNB_RED   = "#FF5A5F"
AIRBNB_TEAL  = "#00A699"
AIRBNB_ORANGE= "#FC642D"
AIRBNB_GRAY  = "#BFBFBF"
AIRBNB_DARK_BG = "#0E1117"
AIRBNB_CARD   = "#151A22"
AIRBNB_BORDER = "#232A35"
CONT_GRADIENT = "Reds"

##########
# CSS Look & Feel Airbnb
st.markdown(f"""
<style>
.block-container {{ padding-top: 1.2rem; padding-bottom: 2rem; }}

/* Fondo degradado unificado */
html, body, [data-testid="stAppViewContainer"], section[data-testid="stSidebar"] {{
    background: radial-gradient(circle at 30% 30%, #131722 0%, #0E1117 100%) !important;
    color: white !important;
}}
section[data-testid="stSidebar"] {{
    border-right: 1px solid {AIRBNB_BORDER};
}}

/* Tarjetas KPI */
.air-card {{
    border: 1px solid {AIRBNB_BORDER};
    border-radius:16px; padding:1rem;
    background:{AIRBNB_CARD};
}}

/* Botones */
.stButton>button {{
    background:{AIRBNB_RED}; color:white; border-radius:12px; border:none;
    padding:.6rem 1rem; font-weight:600;
}}
.stButton>button:hover {{ opacity:.9 }}

/* Tablas */
.stDataFrame, .stTable {{ color: white !important; }}
</style>
""", unsafe_allow_html=True)

##########
# Plotly: plantilla Airbnb
AIRBNB_COLORWAY = ["#FF5A5F", "#00A699", "#FC642D", "#BFBFBF", "#767676"]
pio.templates["airbnb_dark"] = pio.templates["plotly_dark"]
pio.templates["airbnb_dark"].layout.colorway = AIRBNB_COLORWAY
px.defaults.template = "airbnb_dark"
px.defaults.color_continuous_scale = CONT_GRADIENT
px.defaults.height = 420

##########
# Multi-país
COUNTRY_FILES = {
    "Alemania": "Berlin_Final.csv",
    "Valencia": "Valencia_Final.csv",
    "Estocolmo": "Estocolmo_Final.csv",
    "Mexico": "Mexico_Final.csv",
}

COUNTRY_IMAGES = {
    "Alemania": ["assets/Berlin1.jpg", "assets/Berlin3.jpg", "assets/Berlin2.jpg"],
    "Valencia": ["assets/Valencia1.jpg", "assets/Valencia2.jpg", "assets/Valencia3.jpg"],
    "Estocolmo": ["assets/Estocolmo1.jpg", "assets/Estocolmo2.jpg", "assets/Estocolmo3.jpg"],
    "Mexico": ["assets/Mexico1.jpg", "assets/Mexico2.jpg", "assets/Mexico3.jpg"],
}

##########
# Normalización
BIN_TRUE = {"t","true","True",1,"1",True}
BIN_FALSE= {"f","false","False",0,"0",False}

def _normalize_binary(series):
    s = series.copy()
    return s.apply(lambda v: 1 if v in BIN_TRUE else (0 if v in BIN_FALSE else np.nan)).astype("float")

def _normalize_df(df_raw):
    df = df_raw.copy()
    df = df.drop(['Unnamed: 0','latitude','longitude'], axis=1, errors="ignore")
    if 'host_id' in df.columns:
        df['host_id'] = df['host_id'].astype(str)

    for col in ['host_is_superhost','host_identity_verified','instant_bookable']:
        if col in df.columns:
            df[col] = _normalize_binary(df[col])

    for col in ['host_response_rate','host_acceptance_rate','price']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    return df

@st.cache_data(show_spinner=False)
def load_country_df(country: str):
    path = COUNTRY_FILES[country]
    raw = pd.read_csv(path)
    df = _normalize_df(raw)
    Lista = [
        'host_is_superhost','host_identity_verified','host_response_time',
        'host_response_rate','host_acceptance_rate','host_total_listings_count',
        'host_verifications','room_type','property_type','price_cat'
    ]
    return df, Lista

# Carga inicial
df, Lista = load_country_df("Alemania")

##########
# Header
col_logo, col_title = st.columns([1,5], vertical_alignment="center")
with col_logo:
    st.image("assets/Logo3.jpg", width=90)
with col_title:
    st.markdown("""
        # Airbnb Data Analysis
        <span style="color:#767676">Listados, precios y comportamiento de oferta</span>
    """, unsafe_allow_html=True)

##########
# Sidebar
st.sidebar.image("assets/Logoo.jpg", use_container_width=True)
st.sidebar.caption("Análisis exploratorio y modelos")
st.sidebar.markdown("---")
modo_presentacion = st.sidebar.toggle("Modo presentación", value=False)
country = st.sidebar.selectbox("País", list(COUNTRY_FILES.keys()), index=0)
df, Lista = load_country_df(country)
View = st.sidebar.selectbox(
    label='Tipo de análisis',
    options=['Extracción de Características', 'Regresión Lineal', 'Regresión No Lineal', 'Regresión Logística', 'Comparar países'],
    index=0
)

##########################################################################################
# Vista 1 — Extracción de características
if View == "Extracción de Características":
    col1, col2, col3, col4 = st.columns(4)
    with col1:
        st.markdown('<div class="air-card">', unsafe_allow_html=True)
        st.metric("Filas", f"{len(df):,}")
        st.markdown('</div>', unsafe_allow_html=True)
    with col2:
        st.markdown('<div class="air-card">', unsafe_allow_html=True)
        st.metric("Tipos de propiedad", df['property_type'].nunique() if 'property_type' in df.columns else "—")
        st.markdown('</div>', unsafe_allow_html=True)
    with col3:
        st.markdown('<div class="air-card">', unsafe_allow_html=True)
        med_price = np.nanmedian(df['price_eur']) if 'price_eur' in df.columns else np.nan
        st.metric("Mediana de precio (€)", f"€{med_price:,.0f}" if np.isfinite(med_price) else "—")
        st.markdown('</div>', unsafe_allow_html=True)
    with col4:
        st.markdown('<div class="air-card">', unsafe_allow_html=True)
        superhosts = (df['host_is_superhost'] == '1').sum() if 'host_is_superhost' in df.columns else 0
        st.metric("Superhosts", superhosts)
        st.markdown('</div>', unsafe_allow_html=True)

    st.markdown("---")

    Variable_Cat = st.sidebar.selectbox("Variable categórica a analizar", options=Lista)
    Tabla_frecuencias = df[Variable_Cat].value_counts(dropna=False).reset_index().head(10)
    Tabla_frecuencias.columns = ['categorias', 'frecuencia']

    st.title("Extracción de Características")
    st.caption('Se muestran máximo las 10 categorías con más frecuencia.')

    Contenedor_A, Contenedor_B = st.columns(2)
    with Contenedor_A:
        st.subheader("Distribución por categoría (Bar Plot)")
        fig_bar = px.bar(Tabla_frecuencias, x='categorias', y='frecuencia', color='categorias')
        st.plotly_chart(fig_bar, use_container_width=True)
    with Contenedor_B:
        st.subheader("Proporción por categoría (Pie Chart)")
        fig_pie = px.pie(Tabla_frecuencias, names='categorias', values='frecuencia')
        st.plotly_chart(fig_pie, use_container_width=True)

    Contenedor_C, Contenedor_D = st.columns(2)
    with Contenedor_C:
        st.subheader("Gráfico tipo anillo")
        fig_donut = px.pie(Tabla_frecuencias, names='categorias', values='frecuencia', hole=0.5)
        st.plotly_chart(fig_donut, use_container_width=True)
    with Contenedor_D:
        st.subheader("Tendencia acumulada (Área)")
        fig_area = px.area(Tabla_frecuencias.sort_values(by='frecuencia', ascending=False),
                           x='categorias', y='frecuencia')
        st.plotly_chart(fig_area, use_container_width=True)

    st.markdown("---")
    st.subheader("Análisis más profundo")

    if Variable_Cat in ['room_type', 'property_type', 'price_cat'] and 'price' in df.columns:
        st.write("**Relación entre categorías y precio (Boxplot):**")
        fig_box = px.box(df, x=Variable_Cat, y='price', color=Variable_Cat)
        st.plotly_chart(fig_box, use_container_width=True)
    else:
        st.write("**Heatmap de proporciones:**")
        heat_df = pd.crosstab(index=df[Variable_Cat], columns='count', normalize='columns') * 100
        fig_heat = px.imshow(heat_df, color_continuous_scale=CONT_GRADIENT, title="Proporción por categoría")
        st.plotly_chart(fig_heat, use_container_width=True)

    if not modo_presentacion:
        st.markdown("---")
        st.subheader("Tabla de frecuencias")
        st.dataframe(Tabla_frecuencias.style.background_gradient(cmap='Reds'), use_container_width=True)

    st.markdown(f"**Galería:** {country} — Airbnb")
    imgs = COUNTRY_IMAGES.get(country, [])
    gcols = st.columns(3)
    for i, path in enumerate(imgs[:3]):
        with gcols[i]:
            try:
                st.image(path, use_container_width=True)
            except Exception:
                st.write("🖼️ Imagen no encontrada")

##########################################################################################
# (Las otras vistas: Lineal, No Lineal, Logística y Comparar países se mantienen igual)
##########################################################################################

# FOOTER
st.markdown("---")
st.markdown("""
<div style="text-align:center; opacity:0.8; font-size:0.9rem;">
© Proyecto para Gestión de Proyectos — Dashboard creado por <b>Raymundo Díaz</b> con ayuda de IA y profe Freddy.  
<br> Construido con Streamlit, Plotly y Python.
</div>
""", unsafe_allow_html=True)


Overwriting Dashboard_prueba.py
