In [3]:
import pandas as pd
import io
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import ipywidgets as widgets

# --- helpers ---
def infer_type(series):
    if pd.api.types.is_numeric_dtype(series):
        if series.nunique() <= 20:
            return "Numérica discreta"
        else:
            return "Numérica continua"
    else:
        return "Categórica"

def summarize_variable(df, col, assigned_type):
    out = {}
    series = df[col]
    out["Nombre"] = col
    out["Tipo de variable"] = assigned_type
    out["Vacios"] = int(series.isna().sum())
    out["Únicos"] = int(series.nunique(dropna=True))
    if "Categórica" in assigned_type:
        counts = series.astype(str).value_counts().head(5)
        out["Categorías"] = ", ".join([f"{i}: {v}" for i, v in counts.items()])
        out["Media"] = pd.NA
        out["Mediana"] = pd.NA
        out["Desviación"] = pd.NA
        out["Min"] = pd.NA
        out["Max"] = pd.NA
    else:
        out["Categorías"] = pd.NA
        if pd.api.types.is_numeric_dtype(series):
            out["Media"] = series.mean()
            out["Mediana"] = series.median()
            out["Desviación"] = series.std()
            out["Min"] = series.min()
            out["Max"] = series.max()
        else:
            out["Media"] = pd.NA
            out["Mediana"] = pd.NA
            out["Desviación"] = pd.NA
            out["Min"] = pd.NA
            out["Max"] = pd.NA
    return out

def leer_csv_flexible(raw_bytes, try_encodings=None):
    if try_encodings is None:
        try_encodings = ["utf-8", "latin1", "cp1252", "iso-8859-1", "utf-16"]
    for enc in try_encodings:
        try:
            df = pd.read_csv(io.BytesIO(raw_bytes), encoding=enc)
            return df, enc
        except Exception:
            continue
    try:
        df = pd.read_csv(io.BytesIO(raw_bytes))
        return df, "default"
    except Exception as e:
        raise e

def extract_content(upload_value):
    if isinstance(upload_value, dict):
        return list(upload_value.values())[0]
    elif isinstance(upload_value, (tuple, list)) and upload_value:
        return upload_value[0]
    else:
        raise ValueError("Formato inesperado de upload.value")

# --- estado ---
state = {"df": None, "types": {}}

# --- widgets ---
upload = widgets.FileUpload(accept='.csv', multiple=False, description="Sube tu CSV")
type_override_box = widgets.VBox()
summary_out = widgets.Output()
plot_out = widgets.Output()
status_out = widgets.Output()

var_selector = widgets.Dropdown(description="Variable:", options=[], layout=widgets.Layout(width="400px"))
bins_slider = widgets.IntSlider(value=25, min=5, max=100, step=5, description="Bins:", layout=widgets.Layout(width="300px"))
log_scale_chk = widgets.Checkbox(value=False, description="Escala log", indent=False)

# --- funciones ---
def build_type_override_widgets():
    children = []
    for col, inferred_type in state["types"].items():
        dd = widgets.Dropdown(
            options=["Categórica", "Numérica discreta", "Numérica continua"],
            value=inferred_type,
            description=col,
            layout=widgets.Layout(width="450px")
        )
        def on_change(change, column=col):
            state["types"][column] = change["new"]
            update_summary()
            refresh_variable_selector()
            plot_variable()
        dd.observe(on_change, names="value")
        children.append(dd)
    type_override_box.children = children

def update_summary():
    summary_out.clear_output()
    if state["df"] is None:
        return
    df = state["df"]
    with summary_out:
        clear_output()
        print("Resumen de variables")
        summary_list = [summarize_variable(df, col, state["types"][col]) for col in df.columns]
        summary_df = pd.DataFrame(summary_list)
        cols_order = ["Nombre","Tipo de variable","Vacios","Únicos","Categorías",
                      "Media","Mediana","Desviación","Min","Max"]
        summary_df = summary_df[cols_order]
        pd.options.display.float_format = '{:.3f}'.format
        display(summary_df)

def refresh_variable_selector():
    if state["df"] is None:
        var_selector.options = []
        return
    var_selector.options = list(state["df"].columns)
    if var_selector.options and var_selector.value not in var_selector.options:
        var_selector.value = var_selector.options[0]

def plot_variable(*args):
    plot_out.clear_output()
    if state["df"] is None or not var_selector.value:
        return
    df = state["df"]
    var = var_selector.value
    assigned_type = state["types"].get(var, "Categórica")
    series = df[var].dropna()
    with plot_out:
        clear_output()
        if "Categórica" in assigned_type:
            counts = series.astype(str).value_counts()
            plt.figure(figsize=(6,4))
            counts.plot.bar()
            plt.title(f"Gráfico de barras: {var}")
            plt.xlabel(var)
            plt.ylabel("Frecuencia")
            plt.tight_layout()
            plt.show()
        else:
            plt.figure(figsize=(6,3))
            plt.hist(series, bins=bins_slider.value)
            plt.title(f"Histograma de {var}")
            plt.xlabel(var)
            plt.ylabel("Frecuencia")
            if log_scale_chk.value:
                plt.yscale("log")
            plt.tight_layout()
            plt.show()
            plt.figure(figsize=(6,2))
            plt.boxplot(series, vert=False, whis=1.5, patch_artist=True)
            plt.title(f"Boxplot de {var}")
            plt.tight_layout()
            plt.show()

def on_upload(change):
    plot_out.clear_output()
    summary_out.clear_output()
    status_out.clear_output()
    if not upload.value:
        with status_out:
            clear_output()
            print("No se ha subido ningún archivo.")
        return

    with status_out:
        clear_output()
        print("Procesando archivo...")

    try:
        content = extract_content(upload.value)
    except Exception as e:
        with status_out:
            clear_output()
            print("Error extrayendo el contenido del upload:", repr(e))
        return

    try:
        raw = content.get("content", None)
        if raw is None:
            raise KeyError("No se encontró 'content' en el objeto recibido.")
    except Exception as e:
        with status_out:
            clear_output()
            print("Error accediendo al contenido bruto del upload:", repr(e))
        return

    try:
        df, used_enc = leer_csv_flexible(raw)
        with status_out:
            clear_output()
            print(f"Archivo cargado con codificación: {used_enc}")
    except Exception as e:
        with status_out:
            clear_output()
            print("Error al leer el CSV con ninguna codificación probada. Detalle completo:")
            import traceback
            traceback.print_exc()
        with summary_out:
            clear_output()
            print("No se pudo cargar el CSV. Revisa su formato o codificación.")
        return

    state["df"] = df
    inferred = {col: infer_type(df[col]) for col in df.columns}
    state["types"] = inferred.copy()

    build_type_override_widgets()
    update_summary()
    refresh_variable_selector()
    plot_variable()

# --- conexiones ---
upload.observe(on_upload, names="value")
var_selector.observe(plot_variable, names="value")
bins_slider.observe(plot_variable, names="value")
log_scale_chk.observe(plot_variable, names="value")

# --- interfaz ---
display(widgets.HTML("<h1>Estadística descriptiva</h1>"))
display(widgets.HTML("<p>Sube tu archivo CSV, indica los tipos de variable y conoce tus datos 😊</p>"))
display(widgets.HTML("<b>1. Sube tu archivo CSV:</b>"))
display(upload)
display(status_out)
display(widgets.HTML("<b>2. Indica los tipos de variable (opcional):</b>"))
display(type_override_box)
display(summary_out)

display(widgets.HTML("<h3>Visualización de variable</h3>"))
display(widgets.HBox([var_selector, bins_slider, log_scale_chk]))
display(plot_out)


HTML(value='<h1>Estadística descriptiva</h1>')

HTML(value='<p>Sube tu archivo CSV, indica los tipos de variable y conoce tus datos 😊</p>')

HTML(value='<b>1. Sube tu archivo CSV:</b>')

FileUpload(value=(), accept='.csv', description='Sube tu CSV')

Output()

HTML(value='<b>2. Indica los tipos de variable (opcional):</b>')

VBox()

Output()

HTML(value='<h3>Visualización de variable</h3>')

HBox(children=(Dropdown(description='Variable:', layout=Layout(width='400px'), options=(), value=None), IntSli…

Output()