# Análisis interactivo de APL en la Región de Los Ríos
Este cuaderno consolida los datos procesados de adhesión y certificación de APL para explorar patrones relevantes mediante visualizaciones interactivas.

## 1. Cargar librerías y habilitar entorno interactivo con Plotly
Importamos las dependencias clave y configuramos Plotly para trabajar con renderers interactivos dentro del notebook.

In [7]:
from pathlib import Path
from typing import Dict, List, Tuple
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import ipywidgets as widgets
from IPython.display import display

pio.renderers.default = "notebook_connected"
pio.templates.default = "plotly_white"

## 2. Ingestar y concatenar múltiples datasets heterogéneos
Se cargan los datasets preprocesados y se arma un diccionario para acceder a cada tabla. Luego se construyen vistas maestras que combinan métricas complementarias por año y por sector.

In [8]:
def resolve_project_root(current_path: Path) -> Path:
    """Detects the proyecto root tomando como referencia la carpeta notebooks."""
    return current_path.parent if current_path.name == "notebooks" else current_path


PROJECT_ROOT = resolve_project_root(Path.cwd())
DATA_DIR = PROJECT_ROOT / "data" / "processed"
RAW_DATA_DIR = PROJECT_ROOT


if not DATA_DIR.exists():
    raise FileNotFoundError(f"No se encontró el directorio con datos procesados: {DATA_DIR}")


DATASETS: Dict[str, pd.DataFrame] = {
    "adhesion_by_year": pd.read_csv(DATA_DIR / "adhesion_by_year.csv"),
    "adhesion_by_sector": pd.read_csv(DATA_DIR / "adhesion_by_sector.csv"),
    "adhesion_by_size": pd.read_csv(DATA_DIR / "adhesion_by_size.csv"),
    "certification_by_year": pd.read_csv(DATA_DIR / "certification_by_year.csv"),
    "certification_by_sector": pd.read_csv(DATA_DIR / "certification_by_sector.csv"),
    "yearly_summary": pd.read_csv(DATA_DIR / "yearly_summary.csv"),
}


adhesion_sector = DATASETS["adhesion_by_sector"].assign(dataset="Adhesión")
certification_sector = DATASETS["certification_by_sector"].assign(dataset="Certificación")
SECTOR_MASTER = pd.concat([adhesion_sector, certification_sector], ignore_index=True)


yearly_summary = DATASETS["yearly_summary"].copy()
yearly_long = yearly_summary.melt(
    id_vars="year",
    value_vars=[
        "installations_adhesion",
        "companies_adhesion",
        "installations_certification",
        "companies_certification",
    ],
    var_name="indicator",
    value_name="value",
)
yearly_long["scope"] = yearly_long["indicator"].apply(lambda text: "Adhesión" if "adhesion" in text else "Certificación")
yearly_long["metric"] = yearly_long["indicator"].apply(
    lambda text: "Empresas" if "companies" in text else "Instalaciones"
)
yearly_long = yearly_long.drop(columns="indicator")

## 3. Normalizar esquemas y resolver valores faltantes
Consolidamos el esquema de columnas, aplicamos castings de tipos y estrategias simples para completar o descartar valores faltantes antes de analizar.

In [9]:
def coerce_integer_columns(frame: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
    """Devuelve una copia con columnas convertidas a enteros, reemplazando NaN por cero."""
    coerced = frame.copy()
    for column in columns:
        coerced[column] = pd.to_numeric(coerced[column], errors="coerce").fillna(0).astype(int)
    return coerced



def normalize_yearly(frame: pd.DataFrame) -> pd.DataFrame:
    normalized = coerce_integer_columns(
        frame,
        [
            "year",
            "installations_adhesion",
            "companies_adhesion",
            "installations_certification",
            "companies_certification",
        ],
    )
    return normalized.sort_values("year").reset_index(drop=True)



def normalize_sector(frame: pd.DataFrame) -> pd.DataFrame:
    normalized = frame.copy()
    normalized["sector"] = normalized["sector"].str.strip()
    normalized = coerce_integer_columns(normalized, ["installations"])
    return normalized



def normalize_size(frame: pd.DataFrame) -> pd.DataFrame:
    normalized = coerce_integer_columns(frame, ["companies", "installations"])
    normalized["company_size"] = normalized["company_size"].str.title()
    return normalized.sort_values("companies", ascending=False).reset_index(drop=True)


NORMALIZED_DATASETS: Dict[str, pd.DataFrame] = {
    "yearly_summary": normalize_yearly(DATASETS["yearly_summary"]),
    "adhesion_by_year": coerce_integer_columns(
        DATASETS["adhesion_by_year"], ["year", "installations", "companies"]
    ),
    "certification_by_year": coerce_integer_columns(
        DATASETS["certification_by_year"], ["year", "installations", "companies"]
    ),
    "sector_master": normalize_sector(SECTOR_MASTER),
    "adhesion_by_size": normalize_size(DATASETS["adhesion_by_size"]),
}


NORMALIZED_DATASETS["yearly_summary"].head()

Unnamed: 0,year,installations_adhesion,companies_adhesion,unnamed:_3,unnamed:_4,installations_certification,companies_certification
0,2002,3,2,,,0,0
1,2004,12,9,,,0,0
2,2006,0,0,,,9,8
3,2007,14,7,,,0,0
4,2008,101,100,,,3,2


## 4. Explorar distribuciones con histogramas interactivos
Se preparan histogramas y boxplots que permiten comparar la distribución de empresas e instalaciones según distintas dimensiones.

In [10]:
def build_distribution_explorer() -> Tuple[widgets.VBox, go.Figure]:
    """Crea un panel interactivo para explorar histogramas y boxplots."""
    distribution_config = {
        "Adhesión anual": {
            "frame": NORMALIZED_DATASETS["adhesion_by_year"],
            "metrics": ["installations", "companies"],
            "label_map": {"installations": "Instalaciones", "companies": "Empresas"},
        },
        "Certificación anual": {
            "frame": NORMALIZED_DATASETS["certification_by_year"],
            "metrics": ["installations", "companies"],
            "label_map": {"installations": "Instalaciones", "companies": "Empresas"},
        },
        "Tamaño de empresa": {
            "frame": NORMALIZED_DATASETS["adhesion_by_size"],
            "metrics": ["companies", "installations"],
            "label_map": {"companies": "Empresas", "installations": "Instalaciones"},
            "categorical": "company_size",
        },
    }
    dimension_labels = {
        "company_size": "Tamaño de empresa",
    }

    dataset_dropdown = widgets.Dropdown(
        options=list(distribution_config.keys()),
        value="Adhesión anual",
        description="Conjunto:",
    )
    chart_dropdown = widgets.ToggleButtons(
        options=[("Histograma", "hist"), ("Boxplot", "box")],
        description="Visualización:",
        value="hist",
    )
    metric_dropdown = widgets.Dropdown(description="Indicador:")
    output = widgets.Output()

    def update_metric_options(*_: object) -> None:
        config = distribution_config[dataset_dropdown.value]
        metric_dropdown.options = [
            (config["label_map"][metric], metric) for metric in config["metrics"]
        ]
        if metric_dropdown.value not in config["metrics"]:
            metric_dropdown.value = config["metrics"][0]

    def build_figure() -> go.Figure:
        config = distribution_config[dataset_dropdown.value]
        frame = config["frame"]
        metric = metric_dropdown.value
        label = config["label_map"][metric]
        dimension_key = config.get("categorical")
        dimension_label = dimension_labels.get(dimension_key, "Categoría") if dimension_key else ""
        title = f"{label} - {dataset_dropdown.value}"
        if chart_dropdown.value == "hist":
            fig = px.histogram(
                frame,
                x=metric,
                nbins=10,
                color=dimension_key,
                title=title,
            )
            fig.update_layout(
                xaxis_title=label,
                yaxis_title="Frecuencia",
                legend_title_text=dimension_label or None,
            )
            for trace in fig.data:
                categoria_linea = f"<br>{dimension_label}: {trace.name}" if dimension_label else ""
                trace.update(hovertemplate=f"{label}: %{{x}}<br>Frecuencia: %{{y}}{categoria_linea}<extra></extra>")
        else:
            fig = px.box(
                frame,
                y=metric,
                color=dimension_key,
                title=title,
            )
            fig.update_layout(
                yaxis_title=label,
                xaxis_title=dimension_label or "Observaciones",
                legend_title_text=dimension_label or None,
            )
            for trace in fig.data:
                categoria_linea = f"<br>{dimension_label}: {trace.name}" if dimension_label else ""
                trace.update(hovertemplate=f"{label}: %{{y}}{categoria_linea}<extra></extra>")
        return fig

    def render_plot(*_: object) -> None:
        fig = build_figure()
        with output:
            output.clear_output()
            fig.show()

    dataset_dropdown.observe(update_metric_options, names="value")
    chart_dropdown.observe(render_plot, names="value")
    metric_dropdown.observe(render_plot, names="value")

    update_metric_options()
    render_plot()

    controls = widgets.HBox([dataset_dropdown, metric_dropdown, chart_dropdown])
    widget = widgets.VBox([controls, output])
    return widget, build_figure()


distribution_widget, distribution_fig = build_distribution_explorer()
display(distribution_widget)
distribution_fig.show()

VBox(children=(HBox(children=(Dropdown(description='Conjunto:', options=('Adhesión anual', 'Certificación anua…

## 5. Analizar relaciones temporales con series líneas interactivas
Se aprovecha el tablero anual consolidado para visualizar tendencias y comparar la trayectoria de adhesiones versus certificaciones dentro del mismo gráfico.

In [14]:
def build_time_series_view() -> Tuple[widgets.VBox, go.Figure]:
    """Genera una vista de series de tiempo centrada en el indicador de empresas."""
    metric_label = "Empresas"
    years = sorted(yearly_long["year"].unique())
    year_slider = widgets.SelectionRangeSlider(
        options=years,
        index=(0, len(years) - 1),
        description="Rango de años:",
        layout=widgets.Layout(width="70%"),
    )
    scope_selector = widgets.SelectMultiple(
        options=[("Adhesión", "Adhesión"), ("Certificación", "Certificación")],
        value=("Adhesión", "Certificación"),
        description="Ámbitos:",
    )
    info_label = widgets.HTML("<b>Indicador fijo:</b> Empresas")
    output = widgets.Output()

    def build_figure() -> go.Figure:
        start_year, end_year = year_slider.value
        mask = (
            (yearly_long["year"] >= start_year)
            & (yearly_long["year"] <= end_year)
            & (yearly_long["metric"] == metric_label)
            & (yearly_long["scope"].isin(scope_selector.value))
        )
        filtered = yearly_long.loc[mask]
        fig = px.line(
            filtered,
            x="year",
            y="value",
            color="scope",
            markers=True,
            title="Empresas por año",
        )
        fig.update_layout(
            xaxis_title="Año",
            yaxis_title=metric_label,
            legend_title_text="Ámbito",
        )
        for trace in fig.data:
            trace.update(hovertemplate="Año: %{x}<br>Empresas: %{y}<br>Ámbito: " + trace.name + "<extra></extra>")
        return fig

    def render(*_: object) -> None:
        fig = build_figure()
        with output:
            output.clear_output()
            fig.show()

    year_slider.observe(render, names="value")
    scope_selector.observe(render, names="value")

    render()
    controls = widgets.HBox([info_label, scope_selector])
    widget = widgets.VBox([controls, year_slider, output])
    return widget, build_figure()


time_series_widget, time_series_fig = build_time_series_view()
display(time_series_widget)
time_series_fig.show()

VBox(children=(HBox(children=(HTML(value='<b>Indicador fijo:</b> Empresas'), SelectMultiple(description='Ámbit…

## 6. Construir panel comparativo con ipywidgets y Plotly
Se combinan controles interactivos para contrastar métricas clave según año, sector y tipo de indicador dentro de un mini dashboard exploratorio.

In [15]:
def build_comparison_dashboard() -> Tuple[widgets.VBox, go.Figure]:
    """Panel para comparar empresas mediante vistas temporal y por tamaño."""
    view_selector = widgets.ToggleButtons(
        options=[
            ("Serie temporal", "time"),
            ("Tamaño de empresa", "size"),
        ],
        value="time",
        description="Vista:",
    )
    info_label = widgets.HTML("<b>Indicador fijo:</b> Empresas")
    output = widgets.Output()

    def build_figure() -> go.Figure:
        view = view_selector.value
        if view == "time":
            metric_mask = yearly_long["metric"].str.lower() == "empresas"
            frame = yearly_long.loc[metric_mask]
            fig = px.line(
                frame,
                x="year",
                y="value",
                color="scope",
                markers=True,
                title="Empresas por año",
            )
            fig.update_layout(
                xaxis_title="Año",
                yaxis_title="Empresas",
                legend_title_text="Ámbito",
            )
            for trace in fig.data:
                trace.update(hovertemplate="Año: %{x}<br>Empresas: %{y}<br>Ámbito: " + trace.name + "<extra></extra>")
            return fig

        frame = NORMALIZED_DATASETS["adhesion_by_size"].copy()
        fig = px.bar(
            frame,
            x="company_size",
            y="companies",
            text="companies",
            title="Empresas por tamaño de empresa (Adhesión)",
        )
        fig.update_traces(
            textposition="outside",
            hovertemplate="Tamaño de empresa: %{x}<br>Empresas: %{y}<extra></extra>",
        )
        fig.update_layout(
            xaxis_title="Tamaño de empresa",
            yaxis_title="Empresas",
        )
        return fig

    def render(*_: object) -> None:
        fig = build_figure()
        with output:
            output.clear_output()
            fig.show()

    view_selector.observe(render, names="value")

    render()
    controls = widgets.HBox([info_label, view_selector])
    widget = widgets.VBox([controls, output])
    return widget, build_figure()


comparison_widget, comparison_fig = build_comparison_dashboard()
display(comparison_widget)
comparison_fig.show()

VBox(children=(HBox(children=(HTML(value='<b>Indicador fijo:</b> Empresas'), ToggleButtons(description='Vista:…

## 7. Sectores con instalaciones registradas
Visualizaciones específicas para destacar el número de instalaciones adheridas y certificadas por sector económico clave.

In [16]:
sector_summary = NORMALIZED_DATASETS["sector_master"].copy()
sector_summary["sector"] = sector_summary["sector"].str.replace("Agricultura, ganadería, pesca y silvicultura", "Agro, pesca y silvicultura")

sector_fig = px.bar(
    sector_summary,
    x="installations",
    y="sector",
    color="dataset",
    barmode="group",
    orientation="h",
    text="installations",
    title="Instalaciones por sector económico",
    labels={"installations": "Instalaciones", "sector": "Sector", "dataset": "Tipo"},
)
sector_fig.update_traces(textposition="outside")
for trace in sector_fig.data:
    trace.update(hovertemplate="Sector: %{y}<br>Instalaciones: %{x}<br>Tipo: " + trace.name + "<extra></extra>")
sector_fig.update_layout(yaxis=dict(categoryorder="total descending"))
sector_fig.show()

## 8. Distribución por tamaño de empresa
Comparativa directa entre el número de empresas e instalaciones según el tamaño declarado.

In [17]:
size_summary = NORMALIZED_DATASETS["adhesion_by_size"].copy()
size_summary["company_size"] = size_summary["company_size"].astype(str)

size_long = size_summary.melt(
    id_vars="company_size",
    value_vars=["companies", "installations"],
    var_name="indicador",
    value_name="valor",
)
size_long["indicador"] = size_long["indicador"].map({"companies": "Empresas", "installations": "Instalaciones"})

size_fig = px.bar(
    size_long,
    x="company_size",
    y="valor",
    color="indicador",
    text="valor",
    barmode="group",
    title="Empresas e instalaciones por tamaño de empresa",
    labels={"company_size": "Tamaño de empresa", "valor": "Cantidad", "indicador": "Tipo"},
)
size_fig.update_traces(textposition="outside")
for trace in size_fig.data:
    trace.update(hovertemplate="Tamaño de empresa: %{x}<br>" + trace.name + ": %{y}<extra></extra>")
size_fig.show()