# Dash Final Versión 2 — Mejorada y Profesional

**Dashboard 360° para Segmentación de Clientes**

- **Totalmente funcional fuera de Jupyter** (Dash standalone, abre automáticamente en tu navegador).
- Slider de clusters (K) que recalcula y actualiza el análisis y visualizaciones.
- Gráficos de codo y silhouette para K óptimo.
- Destacado visual y filtro de clusters (mapa, barras, narrativa, tabla dinámica y exportable).
- Panel de KPIs, recomendaciones por cluster y top productos.
- Diseño profesional (Dash Bootstrap), feedback visual, auto-scroll al cargar, y comentarios para fácil extensión.

## Requisitos
- `pip install pandas numpy scikit-learn plotly dash dash-bootstrap-components scipy openpyxl`
- Archivo `Data_Set_Global.xlsx` con hoja `Pedidos` en la misma carpeta.

## EJECUCIÓN STANDALONE
```bash
python Dash_Final_Version2_Mejorado.ipynb
```
o, si usas JupyterLab, descomenta la línea de JupyterDash.

Abre tu navegador en http://127.0.0.1:8052/ (lo hace automáticamente).

In [1]:
import os
import webbrowser
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA, NMF
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output, State, dash_table, no_update
import dash_bootstrap_components as dbc
from scipy.stats import entropy
import datetime as dt
import warnings
warnings.filterwarnings('ignore')
# Para Jupyter, puedes usar:
# from jupyter_dash import JupyterDash

## Carga y preprocesamiento de datos

In [None]:
file_path = "Data_Set_Global.xlsx"
assert os.path.exists(file_path), f"No se encuentra el archivo {file_path}"
pedidos = pd.read_excel(file_path, sheet_name="Pedidos")
pedidos["total_price"] = pedidos["quantity"] * pedidos["unit_price"]
pedidos["order_date"] = pd.to_datetime(pedidos["order_date"])
for col in ["promised_delivery_time","actual_delivery_time"]:
    if col in pedidos.columns:
        pedidos[col] = pd.to_datetime(pedidos[col])
if "promised_delivery_time" in pedidos.columns and "actual_delivery_time" in pedidos.columns:
    pedidos["delivery_delay_min"] = (
        (pedidos["actual_delivery_time"] - pedidos["promised_delivery_time"]).dt.total_seconds() / 60
    )
pedidos = pedidos.dropna(subset=["order_id", "customer_id", "order_date"]).copy()
pedidos.reset_index(drop=True, inplace=True)

## Funciones de clustering y visualización
Incluye:
- RFM, entropía de métodos de pago, tópicos NMF productos
- Clustering dinámico por K y PCA
- Narrativa y nombre amigable por cluster
- Gráficos codo/silhouette

In [None]:
def preparar_segmentacion(pedidos_filtrado, K=3, nmf_topics=10):
    snapshot_date = pedidos_filtrado["order_date"].max() + dt.timedelta(days=1)
    rfm = pedidos_filtrado.groupby("customer_id").agg(
        recency = ("order_date", lambda date: (snapshot_date - date.max()).days),
        frequency = ("order_id", "nunique"),
        monetary = ("total_price", "sum")
    )
    scaler_rfm = StandardScaler()
    rfm_z = pd.DataFrame(scaler_rfm.fit_transform(rfm), index=rfm.index, columns=[c+"_z" for c in rfm.columns])
    # Entropía de métodos de pago
    if "payment_method" in pedidos_filtrado.columns:
        pay_counts = pedidos_filtrado.groupby(["customer_id", "payment_method"]).size().unstack(fill_value=0)
        pay_probs = pay_counts.div(pay_counts.sum(axis=1), axis=0)
        payment_entropy = pay_probs.apply(lambda row: entropy(row, base=2), axis=1)
        customer_payment_diversity = payment_entropy.rename("payment_entropy").reset_index()
    else:
        customer_payment_diversity = pd.DataFrame({"customer_id": rfm.index, "payment_entropy": 0})
    # NMF sobre productos
    cust_prod = pedidos_filtrado.groupby(["customer_id", "product_id"]).size().unstack(fill_value=0)
    nmf = NMF(n_components=min(nmf_topics, len(cust_prod.columns)), init="random", random_state=42)
    W = nmf.fit_transform(cust_prod)
    customer_topics = pd.DataFrame(W, index=cust_prod.index, columns=[f"topic_{i+1}" for i in range(W.shape[1])])
    # Unión y clustering
    full_df = (
        rfm_z
        .join(customer_payment_diversity.set_index("customer_id"), how="left")
        .join(customer_topics, how="left")
    ).fillna(0)
    X_scaled = StandardScaler().fit_transform(full_df.select_dtypes(include=np.number))
    kmeans = KMeans(n_clusters=K, random_state=42, n_init=10)
    cluster_labels = kmeans.fit_predict(X_scaled)
    full_df["cluster"] = cluster_labels
    full_df.index.name = "customer_id"
    profile_cols = [c for c in full_df.columns if c.startswith(('recency','frequency','monetary','payment_entropy','topic_'))]
    cluster_profile = full_df.groupby("cluster")[profile_cols].mean().round(2)
    pca = PCA(n_components=2)
    coords = pca.fit_transform(X_scaled)
    df_plot = full_df.reset_index().copy()
    df_plot['PCA1'] = coords[:,0]
    df_plot['PCA2'] = coords[:,1]
    return full_df, cluster_profile, kmeans, scaler_rfm, customer_topics, df_plot, X_scaled

def cluster_summary_narrative(cluster_id, cluster_profile, cluster_names):
    row = cluster_profile.loc[cluster_id]
    name = cluster_names.get(cluster_id, f"Cluster {cluster_id}")
    resumen = (
        f"### {name}\n"
        f"• Recency (recencia): {row['recency_z']:.2f}\n"
        f"• Frequency (frecuencia): {row['frequency_z']:.2f}\n"
        f"• Monetary (monto): {row['monetary_z']:.2f}\n"
        f"• Diversidad de pago: {row['payment_entropy']:.2f}\n"
        f"\n[Recomendación automática: adapta tu propuesta comercial a este perfil.]"
    )
    return resumen

def get_cluster_names(K):
    nombres = {
        0: "Champions 🏆",
        1: "Dormant 💤",
        2: "Ocasionales 🎯",
        3: "Leales 💳",
        4: "Prometedores 💡"
    }
    return {i: nombres.get(i, f"Cluster {i}") for i in range(K)}

def elbow_silhouette(X):
    inertias, silhouettes, Ks = [], [], range(2,11)
    for k in Ks:
        km = KMeans(n_clusters=k, random_state=42, n_init=10).fit(X)
        inertias.append(km.inertia_)
        silhouettes.append(silhouette_score(X, km.labels_))
    fig1 = go.Figure([go.Scatter(x=list(Ks), y=inertias, mode='lines+markers')])
    fig1.update_layout(title="Método del codo", xaxis_title="K", yaxis_title="Inercia")
    fig2 = go.Figure([go.Scatter(x=list(Ks), y=silhouettes, mode='lines+markers')])
    fig2.update_layout(title="Silhouette promedio", xaxis_title="K", yaxis_title="Coef. silhouette")
    return fig1, fig2

## Layout Dash
- Filtros, slider, KPIs, visualizaciones, tabla, exportación y branding profesional

In [None]:
# Para JupyterDash usa:
# app = JupyterDash(__name__, external_stylesheets=[dbc.themes.MINTY])
app = Dash(__name__, external_stylesheets=[dbc.themes.MINTY])
min_date = pedidos['order_date'].min().date()
max_date = pedidos['order_date'].max().date()
app.layout = dbc.Container([
    html.H2("🧑‍💻 Dash Final Versión 2 Mejorado: Segmentación de Clientes"),
    dbc.Row([
        dbc.Col([
            html.Label('Rango de fechas (todas las visualizaciones):'),
            dcc.DatePickerRange(
                id='date-range',
                min_date_allowed=min_date,
                max_date_allowed=max_date,
                start_date=min_date,
                end_date=max_date
            )
        ], width=6),
        dbc.Col([
            html.Label('Número de clusters (K):'),
            dcc.Slider(id='slider-k', min=2, max=5, step=1, value=3,
                       marks={i:str(i) for i in range(2,6)}, tooltip={"placement":"bottom"})
        ], width=6)
    ]),
    html.Br(),
    dcc.Loading(html.Div(id='kpi-panel')),
    html.Br(),
    dbc.Row([
        dbc.Col([dcc.Loading(dcc.Graph(id='bar-segment'))], width=6),
        dbc.Col([dcc.Loading(dcc.Graph(id='corr-matrix'))], width=6),
    ]),
    dbc.Row([
        dbc.Col([dcc.Loading(dcc.Graph(id='delay-hist'))], width=6),
        dbc.Col([dcc.Loading(dcc.Graph(id='conversion-hist'))], width=6),
    ]),
    html.Div(id='top5-prod'),
    html.Hr(),
    html.H3("Gráficos de codo y silhouette para elegir K"),
    dbc.Row([
        dbc.Col([dcc.Loading(dcc.Graph(id='fig-codo'))], width=6),
        dbc.Col([dcc.Loading(dcc.Graph(id='fig-silhouette'))], width=6)
    ]),
    html.H3("Clusters de clientes (mapa visual y barras)"),
    dcc.Loading(dcc.Graph(id='fig-pca')),
    dcc.Loading(dcc.Graph(id='fig-bar-cluster')),
    html.Hr(),
    html.H4("KPIs y narrativas por cluster"),
    dcc.Dropdown(id='dropdown-cluster', multi=True, value=[], clearable=True, placeholder='Selecciona cluster para detalle'),
    html.Div(id='cluster-summary'),
    html.Br(),
    dash_table.DataTable(
        id='clientes-table',
        page_size=10, filter_action="native", sort_action="native", style_table={'overflowX': 'auto'}, style_cell={'textAlign': 'center'},
        style_header={'backgroundColor': '#f8f9fa', 'fontWeight': 'bold'},
        style_data_conditional=[
            {
                'if': {'row_index': 'odd'},
                'backgroundColor': '#f3f8fb'
            }
        ]
    ),
    html.Button("Exportar a CSV", id="export-btn", n_clicks=0, style={"marginLeft":10}),
    dcc.Download(id="download-clients"),
    html.Br(), html.Br(),
    html.Footer([html.Span("© 2025 | Adaptado y mejorado por AlejandroRS1 con Copilot AI | "),
                html.A("GitHub", href="https://github.com/AlejandroRS1/Miner-a-de-datos", target="_blank")], style={"textAlign":"center", "color":"#888"})
], fluid=True)

## Callback principal: clustering dinámico y visualización

In [None]:
@app.callback(
    [Output('kpi-panel','children'),
     Output('bar-segment','figure'),
     Output('corr-matrix','figure'),
     Output('delay-hist','figure'),
     Output('conversion-hist','figure'),
     Output('top5-prod','children'),
     Output('fig-codo','figure'),
     Output('fig-silhouette','figure'),
     Output('fig-pca','figure'),
     Output('fig-bar-cluster','figure'),
     Output('dropdown-cluster','options'),
     Output('dropdown-cluster','value'),
     Output('clientes-table','columns'),
     Output('clientes-table','data'),
     Output('cluster-summary','children')],
    [Input('date-range','start_date'),
     Input('date-range','end_date'),
     Input('slider-k','value'),
     Input('dropdown-cluster','value')]
)
def update_dashboard(start_date, end_date, K, clusters_selected):
    pedidos_filt = pedidos[(pedidos['order_date']>=pd.to_datetime(start_date)) & (pedidos['order_date']<=pd.to_datetime(end_date))].copy()
    # KPIs
    kpi_dict = {
        "Total de Clientes Analizados": pedidos_filt["customer_id"].nunique(),
        "Órdenes Totales": pedidos_filt["order_id"].nunique(),
        "Productos Únicos": pedidos_filt["product_id"].nunique(),
        "Facturación Total ($)": round(pedidos_filt["total_price"].sum(),2),
        "Periodo Analizado": f"{str(start_date)} a {str(end_date)}"
    }
    kpi_panel = dbc.Row([
        dbc.Col(
            dbc.Card([
                dbc.CardHeader(k),
                dbc.CardBody(html.H5(f"{v:,}" if isinstance(v, (int, float)) else str(v)))
            ]), width=3
        ) for k, v in kpi_dict.items()
    ])
    # Gráficos EDA básicos
    if "customer_segment" in pedidos_filt.columns:
        df_seg = pedidos_filt.groupby("customer_segment")[["order_id"]].count().reset_index()
        fig_bar_segment = px.bar(df_seg, x="customer_segment", y="order_id", text="order_id",
                        labels={"order_id": "Órdenes"}, title="Órdenes por segmento de cliente")
    else:
        fig_bar_segment = go.Figure().update_layout(title="Órdenes por segmento de cliente (no disponible en los datos)")
    num_cols = pedidos_filt.select_dtypes("number").columns
    corr = pedidos_filt[num_cols].corr().round(2)
    fig_corr = px.imshow(corr, color_continuous_scale='RdBu', title="Matriz de correlación de variables numéricas")
    fig_delay = px.histogram(pedidos_filt, x="delivery_delay_min", nbins=50, title="Demora de entrega (min)")
    if "registration_date" in pedidos_filt.columns:
        agg = (
            pedidos_filt.groupby("customer_id")
            .agg(
                first_order=("order_date", "min"),
                registration_date=("registration_date", "first")
            ).reset_index()
        )
        agg["days_to_first"] = (agg["first_order"] - agg["registration_date"]).dt.days
        fig_conversion = px.histogram(agg, x="days_to_first", nbins=30, title="Días hasta la primera compra")
    else:
        fig_conversion = go.Figure().update_layout(title="Días hasta la primera compra (no disponible en los datos)")
    N = 5
    if "customer_segment" in pedidos_filt.columns and "product_name" in pedidos_filt.columns:
        topN = (
            pedidos_filt.groupby(["customer_segment", "product_name"])
            .size().groupby(level=0, group_keys=False).nlargest(N)
            .reset_index(name="compras")
        )
        top5_prod_segment = {seg: topN[topN["customer_segment"]==seg][["product_name","compras"]].to_dict('records') for seg in topN["customer_segment"].unique()}
        top5_prod_panel = html.Div([
            html.H5("Top 5 productos por segmento"),
            html.Ul([
                html.Li([
                    html.B(f"Segmento {seg}: "),
                    ', '.join([f"{d['product_name']} ({d['compras']})" for d in lst])
                ]) for seg, lst in top5_prod_segment.items()
            ])
        ])
    else:
        top5_prod_panel = html.Div("No disponible top de productos por segmento.")
    # CLUSTERING
    full_df, cluster_profile, kmeans, scaler_rfm, customer_topics, df_plot, X_scaled = preparar_segmentacion(pedidos_filt, K)
    cluster_names = get_cluster_names(K)
    # Codo y silhouette
    fig_codo, fig_silhouette = elbow_silhouette(X_scaled)
    # PCA
    fig_pca = px.scatter(df_plot, x='PCA1', y='PCA2', color=df_plot['cluster'].astype(str), symbol=df_plot['cluster'].astype(str),
                        title="Mapa visual de los clusters de clientes",
                        labels={"color": "Cluster"}, hover_data=['customer_id'])
    fig_pca.update_traces(marker=dict(line=dict(width=2, color='DarkSlateGrey')))
    # Barras: cantidad de clientes por cluster
    cluster_counts = full_df['cluster'].value_counts().sort_index()
    fig_bar = px.bar(x=cluster_counts.index, y=cluster_counts.values,
                     labels={'x':'Cluster','y':'Cantidad de clientes'}, title='Clientes por Cluster',
                     color=cluster_counts.index.astype(str), color_discrete_sequence=px.colors.qualitative.Vivid)
    # Opciones cluster dropdown
    cluster_options = [{"label": cluster_names[c], "value": c} for c in sorted(full_df['cluster'].unique())]
    if not clusters_selected:
        clusters_selected = [0]
    # Tabla y resumen narrativo
    filtered_df = full_df.reset_index()
    filtered_df = filtered_df[filtered_df['cluster'].isin(clusters_selected)]
    columns = [{"name": i.replace('_z',' (Z)').replace('_',' ').title(), "id": i} for i in ["recency_z","frequency_z","monetary_z","payment_entropy"]+list(customer_topics.columns)+["cluster"]]
    data = filtered_df[["recency_z","frequency_z","monetary_z","payment_entropy"]+list(customer_topics.columns)+["cluster"]].to_dict('records')
    summaries = []
    for c in clusters_selected:
        info = cluster_summary_narrative(c, cluster_profile, cluster_names)
        summaries.append(html.Div([html.H4(f"Resumen Cluster {c}"), html.Pre(info)]))
    return (kpi_panel, fig_bar_segment, fig_corr, fig_delay, fig_conversion, top5_prod_panel,
            fig_codo, fig_silhouette, fig_pca, fig_bar, cluster_options, clusters_selected, columns, data, summaries)

## Exportación a CSV

In [None]:
@app.callback(
    Output('download-clients', 'data'),
    Input('export-btn', 'n_clicks'),
    State('clientes-table', 'data'),
    prevent_initial_call=True
)
def export_csv(n_clicks, table_data):
    if n_clicks:
        df_export = pd.DataFrame(table_data)
        if not df_export.empty:
            return dcc.send_data_frame(df_export.to_csv, "clientes_segmentados.csv", index=False)
    return no_update

## EJECUTA EL DASHBOARD FUERA DE JUPYTER

Abre automáticamente tu navegador en [http://127.0.0.1:8052/](http://127.0.0.1:8052/)

In [None]:
if __name__ == "__main__":
    import threading, time
    def abrir_browser():
        time.sleep(1)
        webbrowser.open("http://127.0.0.1:8052/")
    threading.Thread(target=abrir_browser).start()
    print("Abriendo tu dashboard en http://127.0.0.1:8052/ ...")
    app.run_server(debug=True, port=8052)