In [1]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats
from numpy.random import Generator, PCG64

import tqdm
import json
import inspect
import warnings
from IPython.display import display, Markdown, Latex

warnings.filterwarnings('ignore')

In [2]:
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

##### Config

In [3]:
pd.set_option("display.max_columns", None)

In [4]:
def get_sample_from_distribution(n_sample, random_state, params):
    """
    Return a sample of size N from de distribution
    """
    scale, loc, b, a = params["scale"], params["loc"], params["b"], params["a"]
    #logger.info(f"param a: {a} \n param b: {b} \n param loc: {loc} \n param scale: {scale}")
    distribution = stats.johnsonsu(a=a, b=b, loc=loc, scale=scale)
    #distribution.random_state = random_state
    return distribution.rvs(size=n_sample) if not pd.isnull(n_sample) else 0

In [5]:
def create_folder(path):
    # Verificar si la carpeta existe
    if not os.path.exists(path):
        os.makedirs(path)
        print(f"La carpeta {path} ha sido creada.")
    else:
        print(f"La carpeta {path} ya existe.")

# Data
## Daily Demand

In [6]:
raw = pd.read_csv("../../data/output_layer_process.csv")
logger.info(f'Quantity of rows input {len(raw.index)}')

INFO:__main__:Quantity of rows input 71774


In [7]:
display(Markdown(f"#### Quantity of customer by layer-period"))
pd.pivot_table(data=raw, index=["year","month"], columns=["layer"], values="cod_customer", aggfunc="count")

#### Quantity of customer by layer-period

Unnamed: 0_level_0,layer,0,1,2
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,1,1421.0,9.0,42.0
2020,2,1253.0,14.0,46.0
2020,3,1057.0,4.0,29.0
2020,4,962.0,6.0,18.0
2020,5,1138.0,6.0,31.0
2020,6,1284.0,9.0,48.0
2020,7,1293.0,14.0,49.0
2020,8,1189.0,5.0,38.0
2020,9,1525.0,5.0,56.0
2020,10,1787.0,7.0,73.0


## Params Distribution

In [8]:
f = open('../../data/scenario-generation/distribution_params.json')
params = json.load(f)
#params

## Customer per Pixel-Month related to 2022

In [9]:
display(Markdown(f"Considerando el año 2022, vamos a samplear tantas veces como clientes tengamos en cada pixel, por cada pixel se sampleara size(pixel) * 12 meses"))
pivot_tbl = pd.pivot_table(data=raw[(raw.year==2022)], index=["pixel"], columns=["layer"], values="cod_customer", aggfunc="nunique")
display(pivot_tbl.T)

Considerando el año 2022, vamos a samplear tantas veces como clientes tengamos en cada pixel, por cada pixel se sampleara size(pixel) * 12 meses

pixel,26.0,42.0,71.0,72.0,73.0,87.0,88.0,104.0,105.0,106.0,107.0,108.0,109.0,111.0,115.0,116.0,119.0,120.0,121.0,122.0,123.0,124.0,125.0,126.0,127.0,130.0,131.0,132.0,133.0,134.0,135.0,136.0,137.0,138.0,139.0,140.0,141.0,142.0,143.0,146.0,147.0,148.0,149.0,150.0,151.0,152.0,153.0,154.0,155.0,156.0,157.0,158.0,161.0,162.0,163.0,164.0,165.0,166.0,167.0,168.0,169.0,170.0,171.0,172.0,173.0,177.0,178.0,179.0,180.0,181.0,182.0,183.0,184.0,185.0,188.0,189.0,192.0,193.0,194.0,195.0,196.0,197.0,198.0,199.0,200.0,201.0,208.0,209.0,210.0,211.0,212.0,213.0,214.0,215.0,216.0,217.0,218.0,224.0,226.0,227.0,228.0,229.0,230.0,234.0,235.0,241.0,242.0,244.0,245.0,246.0,258.0,259.0,261.0,262.0,263.0,274.0,275.0,279.0,280.0,290.0,291.0
layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1
0,3.0,3.0,12.0,25.0,36.0,10.0,7.0,6.0,1.0,8.0,1.0,11.0,1.0,2.0,11.0,3.0,6.0,59.0,77.0,74.0,43.0,49.0,54.0,22.0,30.0,34.0,56.0,36.0,2.0,1.0,35.0,54.0,21.0,60.0,1.0,5.0,62.0,63.0,11.0,39.0,44.0,12.0,45.0,40.0,47.0,40.0,40.0,21.0,3.0,12.0,8.0,9.0,2.0,26.0,165.0,96.0,40.0,45.0,18.0,20.0,50.0,8.0,7.0,27.0,1.0,25.0,128.0,226.0,171.0,84.0,98.0,81.0,42.0,28.0,4.0,32.0,10.0,96.0,184.0,335.0,132.0,120.0,104.0,65.0,35.0,18.0,57.0,78.0,88.0,109.0,85.0,96.0,66.0,40.0,35.0,6.0,4.0,14.0,48.0,60.0,46.0,117.0,20.0,3.0,1.0,3.0,58.0,4.0,68.0,86.0,30.0,6.0,11.0,25.0,14.0,34.0,3.0,1.0,11.0,12.0,3.0
1,,2.0,,,2.0,,,,,,,,,,,,,1.0,2.0,3.0,,,,,1.0,,,,,,2.0,,1.0,1.0,,,3.0,,,1.0,,1.0,,,1.0,,,,,,,,1.0,2.0,1.0,1.0,,,1.0,,,,,,,,2.0,2.0,3.0,3.0,,1.0,,1.0,,,1.0,,2.0,2.0,,1.0,2.0,2.0,,,,,,2.0,,1.0,,,,,,,,,,1.0,,,,,,,,,,,,,,1.0,,,,,
2,,2.0,1.0,1.0,3.0,,1.0,1.0,,1.0,,,,1.0,1.0,1.0,,1.0,7.0,5.0,4.0,4.0,,,4.0,,,2.0,,,2.0,,3.0,5.0,,,6.0,4.0,,6.0,1.0,1.0,2.0,1.0,4.0,2.0,2.0,,,,,,1.0,2.0,4.0,5.0,1.0,,3.0,,,,,,,3.0,9.0,7.0,15.0,7.0,4.0,3.0,3.0,1.0,,1.0,2.0,8.0,22.0,41.0,19.0,5.0,7.0,6.0,1.0,,3.0,4.0,4.0,7.0,1.0,4.0,3.0,,1.0,1.0,1.0,,3.0,2.0,1.0,7.0,,,,,2.0,,4.0,5.0,,,,,1.0,4.0,,,1.0,,


## A) Layer Low

In [10]:
PERIODS = [i for i in range(1,13)]
LAYER="low"

In [11]:
df_low = pd.DataFrame(pivot_tbl[0]).reset_index()
df_low.rename(columns={
        0: "n_customers",
    }, inplace=True
)
df_low["layer"] = LAYER
for period in PERIODS:
    #logger.info(f'[PERIOD] {period}')
    # set RANDOM STATE
    random_state = Generator(PCG64(12345))
    # sampling by pixel
    df_low[period] = df_low.apply(lambda x: sum(
                                        get_sample_from_distribution(
                                            n_sample = int(x["n_customers"]), 
                                            random_state = random_state,
                                            params = params[LAYER][str(period)],
                                        )
                                    ),
                                    axis=1
                         )
    logger.info(f"[{LAYER}][{period}] average sample {df_low[period].mean()}" )

INFO:__main__:[low][1] average sample 126.90342837525975
INFO:__main__:[low][2] average sample 128.00755587454572
INFO:__main__:[low][3] average sample 128.07253021026162
INFO:__main__:[low][4] average sample 128.7380933051334
INFO:__main__:[low][5] average sample 134.37199442146695
INFO:__main__:[low][6] average sample 137.14394624263926
INFO:__main__:[low][7] average sample 127.30006382489985
INFO:__main__:[low][8] average sample 128.6870182139909
INFO:__main__:[low][9] average sample 132.25838210968334
INFO:__main__:[low][10] average sample 137.19295807961694
INFO:__main__:[low][11] average sample 136.24435462906376
INFO:__main__:[low][12] average sample 145.4273469449249


## B) Layer Medium

In [12]:
PERIODS = [i for i in range(1,13)]
LAYER="medium"

In [13]:
df_medium = pd.DataFrame(pivot_tbl[2]).reset_index()
df_medium.rename(columns={
        2: "n_customers",
    }, inplace=True
)
df_medium = df_medium.dropna()
df_medium["layer"] = LAYER
for period in PERIODS:
    #logger.info(f'[PERIOD] {period}')
    # set RANDOM STATE
    random_state = Generator(PCG64(12345))
    # sampling by pixel
    df_medium[period] = df_medium.apply(lambda x: sum(
                                        get_sample_from_distribution(
                                            n_sample = int(x["n_customers"]), 
                                            random_state = random_state,
                                            params = params[LAYER][str(period)],
                                        )
                                    ),
                                    axis=1
                         )
    logger.info(f"[{LAYER}][{period}] average sample {df_medium[period].mean()}" )

INFO:__main__:[medium][1] average sample 134.75651539660595
INFO:__main__:[medium][2] average sample 119.57855616148866
INFO:__main__:[medium][3] average sample 128.69350019148698
INFO:__main__:[medium][4] average sample 124.56519364717914
INFO:__main__:[medium][5] average sample 117.9021653815419
INFO:__main__:[medium][6] average sample 128.95462852406598
INFO:__main__:[medium][7] average sample 128.71953933204375
INFO:__main__:[medium][8] average sample 129.58216984088207
INFO:__main__:[medium][9] average sample 117.59461839845048
INFO:__main__:[medium][10] average sample 120.09425969772926
INFO:__main__:[medium][11] average sample 116.9182083394635
INFO:__main__:[medium][12] average sample 124.4583941137536


## C) Layer high

In [14]:
PERIODS = [i for i in range(1,13)]
LAYER="high"

In [15]:
df_high = pd.DataFrame(pivot_tbl[1]).reset_index()
df_high.rename(columns={
        1: "n_customers",
    }, inplace=True
)
df_high = df_high.dropna()
df_high["layer"] = LAYER
for period in PERIODS:
    #logger.info(f'[PERIOD] {period}')
    # set RANDOM STATE
    random_state = Generator(PCG64(12345))
    # sampling by pixel
    df_high[period] = df_high.apply(lambda x: sum(
                                        get_sample_from_distribution(
                                            n_sample = int(x["n_customers"]), 
                                            random_state = random_state,
                                            params = params[LAYER][str(period)],
                                        )
                                    ),
                                    axis=1
                         )
    logger.info(f"[{LAYER}][{period}] average sample {df_high[period].mean()}" )

INFO:__main__:[high][1] average sample 141.88480960989304
INFO:__main__:[high][2] average sample 161.16647786733435
INFO:__main__:[high][3] average sample 128.95738573191724
INFO:__main__:[high][4] average sample 135.4742570612636
INFO:__main__:[high][5] average sample 142.54109609718594
INFO:__main__:[high][6] average sample 148.71992621011032
INFO:__main__:[high][7] average sample 137.4079571608226
INFO:__main__:[high][8] average sample 155.06964295636735
INFO:__main__:[high][9] average sample 143.38613662860027
INFO:__main__:[high][10] average sample 127.61340681681935
INFO:__main__:[high][11] average sample 151.15614909768416
INFO:__main__:[high][12] average sample 142.74814635360144


## Merge Data

In [16]:
df_output = pd.concat([
    df_low,
    df_medium,
    df_high,
    ]
).reset_index(drop=True)
df_output

Unnamed: 0,pixel,n_customers,layer,1,2,3,4,5,6,7,8,9,10,11,12
0,26.0,3.0,low,6.298318,7.746922,3.281857,5.551407,8.057735,10.786291,13.789572,5.087591,17.882065,8.584004,6.524888,8.379531
1,42.0,3.0,low,8.452621,4.743004,8.970364,14.196209,3.874252,8.988068,12.541697,15.587404,6.723709,20.843722,10.653814,14.194566
2,71.0,12.0,low,43.401483,60.676925,30.782679,54.717055,60.770371,34.103745,31.331525,70.531120,32.505442,42.952394,20.147025,44.695553
3,72.0,25.0,low,106.845479,79.615464,60.303385,95.964422,57.054246,48.231496,58.586268,68.376043,67.161471,66.696053,66.289490,83.524875
4,73.0,36.0,low,102.194480,80.846655,139.203942,167.458367,160.684623,119.121980,99.507744,110.386296,98.216539,85.138868,88.512949,132.302555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
225,199.0,2.0,high,253.477296,168.108914,180.709728,170.924458,188.867675,201.241361,237.734771,180.347960,136.904322,160.654311,368.126569,196.343763
226,211.0,2.0,high,184.196691,222.088521,167.483717,155.820932,150.963518,166.186497,185.687947,177.359922,146.779210,138.896259,164.988639,133.729840
227,213.0,1.0,high,80.666332,66.327282,86.729864,75.719300,123.245643,135.165288,64.958308,66.361135,99.012290,151.539245,64.046919,77.726946
228,229.0,1.0,high,83.957693,118.229679,67.017521,59.388040,136.224426,98.235891,72.091039,90.897421,100.211850,77.699294,62.157054,68.488011


# Creation Multiple-Scenario

In [23]:
N_SCENARIO = 500
PERIODS = [i for i in range(1,13)]
LAYERS = ["low", "medium", "high"]

In [24]:
path=f'../../data/scenario-generation/scenarios'
create_folder(path)

pivot_tbl_copy = pivot_tbl.rename(columns={0: "low", 1:"high", 2:"medium"}).copy()
for id in range(N_SCENARIO):
    logger.info(f"[scenario {id}] running...")

    output = []
    for layer in LAYERS:
        df = pd.DataFrame(pivot_tbl_copy[layer]).reset_index().copy()
        df = df.rename(columns={layer: "n_customers"})
        df = df.dropna()
        df['layer'] = layer
        for period in PERIODS:
            df[period] = df.apply(lambda x: sum(
                                        get_sample_from_distribution(
                                            n_sample = int(x[f"n_customers"]),
                                            random_state = random_state,
                                            params = params[layer][str(period)],
                                        )
                                    ),
                                    axis=1
                         )
            logger.info(f"[{layer}][{period}] average sample {df[period].mean()}" )
        output.append(df)
    df_output = pd.concat(output)
    logger.info(f"[scenario {id}] saving file...")
    df_output.to_csv(f'../../data/scenario-generation/scenarios/scenario_{id}.csv', index=False)

INFO:__main__:[scenario 0] running...
INFO:__main__:[low][1] average sample 129.9654128116075
INFO:__main__:[low][2] average sample 130.8945728147574
INFO:__main__:[low][3] average sample 131.412320347121
INFO:__main__:[low][4] average sample 134.64955254044128


La carpeta ../../data/scenario-generation/scenarios ya existe.


INFO:__main__:[low][5] average sample 127.3056667212213
INFO:__main__:[low][6] average sample 147.06353676350545
INFO:__main__:[low][7] average sample 131.08971978551114
INFO:__main__:[low][8] average sample 126.90699106106914
INFO:__main__:[low][9] average sample 136.6937308494158
INFO:__main__:[low][10] average sample 139.83588620381428
INFO:__main__:[low][11] average sample 141.26557176980526
INFO:__main__:[low][12] average sample 151.68760440916242
INFO:__main__:[medium][1] average sample 125.4983006529024
INFO:__main__:[medium][2] average sample 119.07216473837562
INFO:__main__:[medium][3] average sample 127.30624210972023
INFO:__main__:[medium][4] average sample 139.10801855584538
INFO:__main__:[medium][5] average sample 119.31989017311852
INFO:__main__:[medium][6] average sample 130.99796097437132
INFO:__main__:[medium][7] average sample 121.22752204294403
INFO:__main__:[medium][8] average sample 126.21043579710577
INFO:__main__:[medium][9] average sample 118.99007058481024
INFO