In [None]:
from IPython.display import display, Markdown
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
import seaborn as sns

from scipy import stats
from numpy.random import Generator, PCG64

import tqdm
import json
import inspect

In [None]:
pd.set_option("display.max_columns", None)

In [None]:
def get_sample_from_distribution(n_sample, random_state, params, expected=False):
    """
    Genera un sample de tamaño n_sample desde una Johnson SU,
    o devuelve el valor esperado si expected=True.
    """
    a = params["a"]
    b = params["b"]
    loc = params["loc"]
    scale = params["scale"]

    distribution = stats.johnsonsu(a=a, b=b, loc=loc, scale=scale)

    if expected:
        mean = distribution.expect()
        return mean if np.isfinite(mean) else 0.0

    # Si n_sample no es válido → devuelvo array vacío
    if n_sample is None or pd.isnull(n_sample) or n_sample <= 0:
        return np.array([])

    # IMPORTANTE: usar random_state para reproducibilidad
    return distribution.rvs(size=int(n_sample), random_state=random_state)

# Data
## Daily Demand

In [None]:
raw = pd.read_csv("../../data/pixels/raw_pixels_with_drop_demand.csv")
print(f'Quantity of rows input {len(raw.index)}')
display(Markdown(f"#### Quantity of customer by layer-period"))
pd.pivot_table(data=raw, index=["year","month"], columns=["layer"], values="cod_customer", aggfunc="count").T

## Params Distribution

In [None]:
f = open('../../data/scenarios/distribution/distribution_params.json')
params = json.load(f)
# params

## Customer per Pixel-Month related to 2022

In [None]:
display(Markdown(f"Considerando el año 2022, vamos a samplear tantas veces como clientes tengamos en cada pixel, por cada pixel se sampleara size(pixel) * 12 meses"))
pivot_tbl = pd.pivot_table(data=raw[(raw.year==2022)], index=["pixel"], columns=["layer"], values="cod_customer", aggfunc="nunique")
display(pivot_tbl.T)

# Process

In [None]:
# --- Config ---
LAYERS = ["A", "B"]       # solo dos layers
N_PERIODS = 12            # 12 meses
N_SCENARIO = 100            # escenarios simulados
GENERATE_EXPECTED = True  # activar escenario determinístico

pivot_tbl_copy = pivot_tbl.copy()

def build_layer_df(pivot_tbl: pd.DataFrame, layer: str) -> pd.DataFrame:
    """
    Construye un DataFrame con columnas: pixel, n_customers, layer
    para un layer dado (A o B).
    """
    if layer not in pivot_tbl.columns:
        raise KeyError(f"El layer '{layer}' no está en pivot_tbl.columns: {list(pivot_tbl.columns)}")

    df_layer = (
        pivot_tbl[[layer]]
        .reset_index()
        .rename(columns={layer: "n_customers"})
        .dropna(subset=["n_customers"])
        .assign(layer=layer)
    )
    return df_layer


all_scenarios = []

# --------------------------------------------------
# 1) ESCENARIOS SIMULADOS (montecarlo)
# --------------------------------------------------
for scenario_id in range(1, N_SCENARIO + 1):
    print(f"[scenario {scenario_id}] running...")

    random_state = Generator(PCG64(12345 + scenario_id))

    pixels_list = []

    for layer in LAYERS:
        df_layer = build_layer_df(pivot_tbl_copy, layer)

        for row in df_layer.itertuples(index=False):
            n_customers = int(row.n_customers)
            id_pixel = f"{row.layer}-{int(row.pixel)}"

            stop_by_period = [n_customers] * N_PERIODS
            drop_by_period = []

            for period in range(N_PERIODS):
                dist_params = params[layer][str(period + 1)]

                samples = get_sample_from_distribution(
                    n_sample=n_customers,
                    random_state=random_state,
                    params=dist_params,
                    expected=False,    # ← SIMULACIÓN
                )

                drop_total = float(np.sum(samples))
                drop_by_period.append(round(drop_total, 2))

            demand_by_period = [s * d for s, d in zip(stop_by_period, drop_by_period)]

            pixels_list.append(
                {
                    "id_pixel": id_pixel,
                    "drop": drop_by_period,
                    "stop": stop_by_period,
                    "demand": demand_by_period,
                }
            )

    scenario_obj = {
        "id_scenario": scenario_id,
        "pixels": pixels_list,
        "type": "simulated"
    }

    all_scenarios.append(scenario_obj)

    print(f"[scenario {scenario_id}] saving file...")
    with open(f"../../data/scenarios/scenario_{scenario_id}.json", "w") as f:
        json.dump(scenario_obj, f, indent=2)


# --------------------------------------------------
# 2) ESCENARIO EXPECTED (determinístico)
# --------------------------------------------------

if GENERATE_EXPECTED:
    print("[expected] generating scenario...")

    pixels_list = []

    for layer in LAYERS:
        df_layer = build_layer_df(pivot_tbl_copy, layer)

        for row in df_layer.itertuples(index=False):
            n_customers = int(row.n_customers)
            id_pixel = f"{row.layer}-{int(row.pixel)}"

            stop_by_period = [n_customers] * N_PERIODS

            # aquí usamos expected=True
            drop_by_period = []
            for period in range(N_PERIODS):
                dist_params = params[layer][str(period + 1)]

                expected_value = get_sample_from_distribution(
                    n_sample=n_customers,       # no importa, solo para consistencia
                    random_state=None,          # no se usa para expected
                    params=dist_params,
                    expected=True               # ← MEDIA TEÓRICA
                )

                # total drop = media * número de clientes
                drop_total = round(float(expected_value) * n_customers, 2)
                drop_by_period.append(drop_total)

            # demand = drop * stop
            demand_by_period = [s * d for s, d in zip(stop_by_period, drop_by_period)]

            pixels_list.append(
                {
                    "id_pixel": id_pixel,
                    "drop": drop_by_period,
                    "stop": stop_by_period,
                    "demand": demand_by_period,
                }
            )

    expected_obj = {
        "id_scenario": "expected",
        "pixels": pixels_list,
        "type": "expected"
    }

    all_scenarios.append(expected_obj)

    print("[expected] saving file...")
    with open(f"../../data/scenarios/scenario_expected.json", "w") as f:
        json.dump(expected_obj, f, indent=2)


In [None]:
from pprint import pprint
pprint(all_scenarios[0])