In [None]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats
from numpy.random import Generator, PCG64

import tqdm
import json
import inspect
import warnings
from IPython.display import display, Markdown, Latex

warnings.filterwarnings('ignore')

In [None]:
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

##### Config

In [None]:
pd.set_option("display.max_columns", None)

In [None]:
def get_sample_from_distribution(n_sample, random_state, params):
    """
    Return a sample of size N from de distribution
    """
    scale, loc, b, a = params["scale"], params["loc"], params["b"], params["a"]
    #logger.info(f"param a: {a} \n param b: {b} \n param loc: {loc} \n param scale: {scale}")
    distribution = stats.johnsonsu(a=a, b=b, loc=loc, scale=scale)
    #distribution.random_state = random_state
    return distribution.rvs(size=n_sample) if not pd.isnull(n_sample) else 0

In [None]:
def create_folder(path):
    # Verificar si la carpeta existe
    if not os.path.exists(path):
        os.makedirs(path)
        print(f"La carpeta {path} ha sido creada.")
    else:
        print(f"La carpeta {path} ya existe.")

# Data
## Daily Demand

In [None]:
raw = pd.read_csv("../../data/output_layer_process.csv")
logger.info(f'Quantity of rows input {len(raw.index)}')

In [None]:
display(Markdown(f"#### Quantity of customer by layer-period"))
pd.pivot_table(data=raw, index=["year","month"], columns=["layer"], values="cod_customer", aggfunc="count")

## Params Distribution

In [None]:
f = open('../../data/scenario-generation/distribution_params.json')
params = json.load(f)
#params

## Customer per Pixel-Month related to 2022

In [None]:
display(Markdown(f"Considerando el año 2022, vamos a samplear tantas veces como clientes tengamos en cada pixel, por cada pixel se sampleara size(pixel) * 12 meses"))
pivot_tbl = pd.pivot_table(data=raw[(raw.year==2022)], index=["pixel"], columns=["layer"], values="cod_customer", aggfunc="nunique")
display(pivot_tbl.T)

## A) Layer Low

In [None]:
PERIODS = [i for i in range(1,13)]
LAYER="low"

In [None]:
df_low = pd.DataFrame(pivot_tbl[0]).reset_index()
df_low.rename(columns={
        0: "n_customers",
    }, inplace=True
)
df_low["layer"] = LAYER
for period in PERIODS:
    #logger.info(f'[PERIOD] {period}')
    # set RANDOM STATE
    random_state = Generator(PCG64(12345))
    # sampling by pixel
    df_low[period] = df_low.apply(lambda x: sum(
                                        get_sample_from_distribution(
                                            n_sample = int(x["n_customers"]), 
                                            random_state = random_state,
                                            params = params[LAYER][str(period)],
                                        )
                                    ),
                                    axis=1
                         )
    logger.info(f"[{LAYER}][{period}] average sample {df_low[period].mean()}" )

## B) Layer Medium

In [None]:
PERIODS = [i for i in range(1,13)]
LAYER="medium"

In [None]:
df_medium = pd.DataFrame(pivot_tbl[2]).reset_index()
df_medium.rename(columns={
        2: "n_customers",
    }, inplace=True
)
df_medium = df_medium.dropna()
df_medium["layer"] = LAYER
for period in PERIODS:
    #logger.info(f'[PERIOD] {period}')
    # set RANDOM STATE
    random_state = Generator(PCG64(12345))
    # sampling by pixel
    df_medium[period] = df_medium.apply(lambda x: sum(
                                        get_sample_from_distribution(
                                            n_sample = int(x["n_customers"]), 
                                            random_state = random_state,
                                            params = params[LAYER][str(period)],
                                        )
                                    ),
                                    axis=1
                         )
    logger.info(f"[{LAYER}][{period}] average sample {df_medium[period].mean()}" )

## C) Layer high

In [None]:
PERIODS = [i for i in range(1,13)]
LAYER="high"

In [None]:
df_high = pd.DataFrame(pivot_tbl[1]).reset_index()
df_high.rename(columns={
        1: "n_customers",
    }, inplace=True
)
df_high = df_high.dropna()
df_high["layer"] = LAYER
for period in PERIODS:
    #logger.info(f'[PERIOD] {period}')
    # set RANDOM STATE
    random_state = Generator(PCG64(12345))
    # sampling by pixel
    df_high[period] = df_high.apply(lambda x: sum(
                                        get_sample_from_distribution(
                                            n_sample = int(x["n_customers"]), 
                                            random_state = random_state,
                                            params = params[LAYER][str(period)],
                                        )
                                    ),
                                    axis=1
                         )
    logger.info(f"[{LAYER}][{period}] average sample {df_high[period].mean()}" )

## Merge Data

In [None]:
df_output = pd.concat([
    df_low,
    df_medium,
    df_high,
    ]
).reset_index(drop=True)
df_output

# Creation Multiple-Scenario

In [None]:
N_SCENARIO = 1000
PERIODS = [i for i in range(1,13)]
LAYERS = ["low", "medium", "high"]

In [None]:
path=f'../../data/scenario-generation/scenarios'
create_folder(path)

pivot_tbl_copy = pivot_tbl.rename(columns={0: "n_customers_low", 1:"n_customers_high", 2:"n_customers_medium"}).copy()
for id in range(N_SCENARIO):
    logger.info(f"[scenario {id}] running...")

    output = []
    for layer in LAYERS:
        df = pd.DataFrame(pivot_tbl_copy[f"n_customers_{layer}"]).reset_index(drop=True).copy()
        df = df.dropna()
        df['layer'] = layer
        for period in PERIODS:
            df[period] = df.apply(lambda x: sum(
                                        get_sample_from_distribution(
                                            n_sample = int(x[f"n_customers_{layer}"]),
                                            random_state = random_state,
                                            params = params[layer][str(period)],
                                        )
                                    ),
                                    axis=1
                         )
            logger.info(f"[{layer}][{period}] average sample {df[period].mean()}" )
        output.append(df)
    df_output = pd.concat(output)
    logger.info(f"[scenario {id}] saving file...")
    df_output.to_csv(f'../../data/scenario-generation/scenarios/scenario_{id}.csv', index=False)