In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import importlib

import itertools as it
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import statsmodels.api as sm

from itertools import product
from scipy.stats import gmean

from criteriaetl.utils.expansion_func import (get_value_counts_with_expansion_factor,
    get_percentage_table_with_expansion_factor)
from criteriaetl.utils.display_func import cdisplay, percentage_count_plot
from criteriaetl.utils.common_func import (get_weighted_complete_randomization_series_on_subset, 
    proportional_cut, weighted_qcut, get_partition_bool_columns_dict)
from criteriaetl.transformers.columns_base import (NameTransformer, 
    ReplaceTransformer, SelectTransformer, AssignTransformer)
from criteriaetl.transformers.rows_base import AggregateTransformer
from criteriaetl.transformers.fusion_base import MergeTransformer

from projectetl.utils.dataload import (load_survey_data_do, save_survey_with_pickle,
                                       load_survey_from_pickle)
from projectetl.utils.config import DATA_DIR

# Load databases

In [3]:
canasta_alimentaria = ['alimentos y bebidas no alcohólicas']
canasta_no_alimentaria = [
    'bebidas alcohólicas y tabaco', 'prendas de vestir y calzado',
    'vivienda', 'muebles', 'salud', 'transporte', 'comunicaciones',
    'recreación y cultura', 'educación', 'restaurantes y hoteles',
    'bienes y servicios diversos'
]

canasta_alimentaria_cols = [f'{base_col_name} indice' for base_col_name in canasta_alimentaria]
canasta_no_alimentaria_cols = [f'{base_col_name} indice' for base_col_name in canasta_no_alimentaria]
canasta_cols = canasta_alimentaria_cols + canasta_no_alimentaria_cols

## Load IPC base 2010 = 100

In [4]:
time_cols = ['anno', 'período']

inflation_2010_raw = load_survey_data_do(DATA_DIR / 'inflation' / 'ipc_grupos_base_2010.csv',
                                         load_func=lambda path: pd.read_csv(str(path)))
inflation_2010_raw = inflation_2010_raw[time_cols + canasta_alimentaria_cols + canasta_no_alimentaria_cols]
inflation_2010_raw.rename(columns={'período': 'periodo'}, inplace=True)
inflation_2010_raw.head()

Unnamed: 0,anno,periodo,alimentos y bebidas no alcohólicas indice,bebidas alcohólicas y tabaco indice,prendas de vestir y calzado indice,vivienda indice,muebles indice,salud indice,transporte indice,comunicaciones indice,recreación y cultura indice,educación indice,restaurantes y hoteles indice,bienes y servicios diversos indice
0,2016,Enero,137.49,145.78,93.46,109.7,115.68,123.98,110.29,102.08,109.26,147.64,122.39,118.35
1,2016,Febrero,134.85,147.3,93.26,109.77,115.74,124.52,109.59,102.08,109.65,147.65,122.45,118.62
2,2016,Marzo,133.33,147.85,92.97,111.01,115.73,124.98,110.49,102.08,109.75,147.65,122.67,118.91
3,2016,Abril,132.13,148.12,92.24,111.66,115.86,125.33,111.09,102.08,109.23,147.65,122.88,119.09
4,2016,Mayo,131.61,148.38,92.65,112.35,115.93,125.83,112.51,102.74,108.85,147.59,123.04,119.38


## Load annual database IPC base 2019 - 2020 = 100

In [5]:
time_cols = ['anno', 'periodo']

inflation_19_20_raw = load_survey_data_do(DATA_DIR / 'inflation' / 'ipc_grupos_base_19_20.csv',
                                          load_func=lambda path: pd.read_csv(str(path)))
inflation_19_20_raw = inflation_19_20_raw[time_cols + canasta_alimentaria_cols + canasta_no_alimentaria_cols]
inflation_19_20_raw.head()

Unnamed: 0,anno,periodo,alimentos y bebidas no alcohólicas indice,bebidas alcohólicas y tabaco indice,prendas de vestir y calzado indice,vivienda indice,muebles indice,salud indice,transporte indice,comunicaciones indice,recreación y cultura indice,educación indice,restaurantes y hoteles indice,bienes y servicios diversos indice
0,1999,Enero,20.99,11.47,47.04,20.85,33.48,20.15,14.17,29.31,35.6,14.18,23.19,27.69
1,1999,Febrero,20.65,11.48,47.24,20.92,33.69,20.47,14.17,29.31,35.94,14.18,23.16,27.95
2,1999,Marzo,20.88,11.76,47.35,20.98,33.79,20.57,14.17,29.31,36.03,14.18,23.16,28.1
3,1999,Abril,20.94,12.04,47.44,21.04,33.91,20.68,14.17,29.31,36.16,14.18,23.16,28.21
4,1999,Mayo,20.79,12.08,47.53,21.08,34.01,20.8,14.18,29.31,36.3,14.18,23.29,28.28


#

# Processing databases

## Replace Transformer

* The `period` column is replaced from spanish names to the corresponding numbers.
* A `date` column is created using `pd.to_datetime`
* The `date` colum is set as index to easily loc a certain date

In [6]:
months = ['Enero', 'Febrero', 'Marzo', 'Abril', 'Mayo', 'Junio', 'Julio', 'Agosto', 'Septiembre', 'Octubre', 'Noviembre', 'Diciembre']
replace_months_map = {
    'periodo': {month: str(i + 1).zfill(2) for i, month in enumerate(months)}
}

replace_months_transformer = ReplaceTransformer(replace_months_map)
inflation_2010_replaced = replace_months_transformer.transform(inflation_2010_raw)
inflation_19_20_replaced = replace_months_transformer.transform(inflation_19_20_raw)

In [7]:
inflation_2010_replaced['date'] = pd.to_datetime(inflation_2010_replaced['anno'].apply(str) + inflation_2010_replaced['periodo'] + '01',
                                                 format='%Y%m%d')
inflation_2010_replaced = inflation_2010_replaced.set_index('date')[canasta_cols]

inflation_19_20_replaced['date'] = pd.to_datetime(inflation_19_20_replaced['anno'].apply(str) + inflation_19_20_replaced['periodo'] + '01',
                                                  format='%Y%m%d')
inflation_19_20_replaced = inflation_19_20_replaced.set_index('date')[canasta_cols]

## Base IPC 2010 database on september 2016

In [8]:
inflation_2016_10_replaced = inflation_2010_replaced.astype(float) / inflation_2010_replaced.loc['2016-09-01'].astype(float)

In [9]:
inflation_2016_10_replaced.head(10)

Unnamed: 0_level_0,alimentos y bebidas no alcohólicas indice,bebidas alcohólicas y tabaco indice,prendas de vestir y calzado indice,vivienda indice,muebles indice,salud indice,transporte indice,comunicaciones indice,recreación y cultura indice,educación indice,restaurantes y hoteles indice,bienes y servicios diversos indice
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2016-01-01,1.043013,0.978783,1.022315,0.971312,0.994327,0.973614,0.9634,0.993576,1.00165,0.955042,0.991333,0.981913
2016-02-01,1.022986,0.988989,1.020127,0.971932,0.994843,0.977855,0.957285,0.993576,1.005226,0.955107,0.991819,0.984153
2016-03-01,1.011455,0.992682,1.016955,0.982911,0.994757,0.981467,0.965147,0.993576,1.006142,0.955107,0.993601,0.986559
2016-04-01,1.002352,0.994494,1.00897,0.988667,0.995874,0.984215,0.970388,0.993576,1.001375,0.955107,0.995302,0.988053
2016-05-01,0.998407,0.99624,1.013454,0.994776,0.996476,0.988142,0.982792,1.0,0.997891,0.954719,0.996598,0.990459
2016-06-01,1.005083,0.99718,1.006673,0.996547,0.998367,0.991126,1.001048,1.0,0.998717,0.96287,0.997246,0.993695
2016-07-01,1.011607,0.997986,1.006673,0.996901,0.999656,0.993953,0.998602,1.0,1.000183,0.963646,0.997813,0.995437
2016-08-01,1.00789,0.99906,1.000875,0.994865,1.000688,0.997016,0.994235,1.0,1.000642,0.987386,0.998704,0.99693
2016-09-01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2016-10-01,0.996359,1.000739,0.996062,1.012307,0.999742,1.002749,1.013976,1.005645,0.999908,1.000065,1.001296,1.001079


In [10]:
inflation_2016_10_replaced.tail(10)

Unnamed: 0_level_0,alimentos y bebidas no alcohólicas indice,bebidas alcohólicas y tabaco indice,prendas de vestir y calzado indice,vivienda indice,muebles indice,salud indice,transporte indice,comunicaciones indice,recreación y cultura indice,educación indice,restaurantes y hoteles indice,bienes y servicios diversos indice
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-12-01,1.145805,1.196791,0.89149,1.076235,1.050112,1.114418,1.142558,1.08721,1.037587,1.145029,1.069091,1.103045
2020-01-01,1.152557,1.200483,0.888099,1.06986,1.05269,1.118894,1.147013,1.08721,1.047671,1.150721,1.072574,1.107359
2020-02-01,1.153088,1.211562,0.887005,1.066053,1.053378,1.122899,1.134085,1.08721,1.04868,1.150721,1.074275,1.111922
2020-03-01,1.151419,1.218947,0.887771,1.053303,1.054324,1.125412,1.106394,1.08721,1.048863,1.150721,1.075085,1.115158
2020-04-01,1.145198,1.221029,0.88788,1.049761,1.054839,1.128946,1.065252,1.08721,1.04978,1.138754,1.075733,1.11773
2020-05-01,1.124412,1.225594,0.888646,1.062157,1.066701,1.136014,1.083071,1.08721,1.054914,1.090109,1.080269,1.125446
2020-06-01,1.147246,1.230496,0.887005,1.087037,1.07564,1.14261,1.136618,1.086529,1.064723,1.056019,1.085534,1.13142
2020-07-01,1.18146,1.23486,0.884489,1.09536,1.09223,1.154233,1.179245,1.086529,1.071782,1.061065,1.094687,1.138389
2020-08-01,1.189956,1.239895,0.88438,1.106517,1.095238,1.164756,1.197065,1.092856,1.071507,1.06928,1.09825,1.145607
2020-09-01,1.202625,1.243588,0.885145,1.110944,1.097903,1.172059,1.201695,1.092856,1.070132,1.08888,1.101733,1.148843


## Base IPC 2019 - 2020 database on 2016

In [11]:
inflation_2016_19_20_replaced = (inflation_19_20_replaced.astype(float) / inflation_19_20_replaced.loc['2020-09-01'].astype(float)) * inflation_2016_10_replaced.loc['2020-09-01']

In [12]:
inflation_2016_19_20_replaced.tail(10)

Unnamed: 0_level_0,alimentos y bebidas no alcohólicas indice,bebidas alcohólicas y tabaco indice,prendas de vestir y calzado indice,vivienda indice,muebles indice,salud indice,transporte indice,comunicaciones indice,recreación y cultura indice,educación indice,restaurantes y hoteles indice,bienes y servicios diversos indice
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-04-01,1.14518,1.221008,0.887907,1.049747,1.05481,1.128939,1.065274,1.087197,1.04976,1.138688,1.075656,1.117699
2020-05-01,1.124365,1.225524,0.88862,1.062073,1.06666,1.135956,1.083088,1.087197,1.054932,1.090103,1.080201,1.125457
2020-06-01,1.147157,1.230406,0.887016,1.087049,1.075602,1.142633,1.136645,1.086436,1.064749,1.055971,1.085503,1.131415
2020-07-01,1.181461,1.2348,0.884522,1.095266,1.092193,1.154177,1.179284,1.086436,1.071821,1.060975,1.094592,1.138386
2020-08-01,1.18995,1.239804,0.884344,1.106511,1.095209,1.164703,1.197097,1.092856,1.071504,1.069202,1.098163,1.145582
2020-09-01,1.202625,1.243588,0.885145,1.110944,1.097903,1.172059,1.201695,1.092856,1.070132,1.08888,1.101733,1.148843
2020-10-01,1.226928,1.247738,0.88372,1.11289,1.102104,1.177944,1.204223,1.092856,1.07256,1.08888,1.105953,1.152553
2020-11-01,1.24472,1.2586,0.878909,1.117215,1.105659,1.182019,1.20928,1.092747,1.074882,1.089881,1.111364,1.156938
2020-12-01,1.24158,1.265069,0.881582,1.124675,1.109215,1.185753,1.228818,1.094814,1.084171,1.089881,1.116774,1.163234
2021-01-01,1.254837,1.277396,0.883898,1.137109,1.112447,1.191073,1.254332,1.094488,1.085332,1.094662,1.126837,1.168968


## Append databases

In [13]:
inflation_2016_appended = inflation_2016_10_replaced.append(inflation_2016_19_20_replaced.loc['2020-10-01':'2021-01-01'])
inflation_2016_appended.tail(10)

Unnamed: 0_level_0,alimentos y bebidas no alcohólicas indice,bebidas alcohólicas y tabaco indice,prendas de vestir y calzado indice,vivienda indice,muebles indice,salud indice,transporte indice,comunicaciones indice,recreación y cultura indice,educación indice,restaurantes y hoteles indice,bienes y servicios diversos indice
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-04-01,1.145198,1.221029,0.88788,1.049761,1.054839,1.128946,1.065252,1.08721,1.04978,1.138754,1.075733,1.11773
2020-05-01,1.124412,1.225594,0.888646,1.062157,1.066701,1.136014,1.083071,1.08721,1.054914,1.090109,1.080269,1.125446
2020-06-01,1.147246,1.230496,0.887005,1.087037,1.07564,1.14261,1.136618,1.086529,1.064723,1.056019,1.085534,1.13142
2020-07-01,1.18146,1.23486,0.884489,1.09536,1.09223,1.154233,1.179245,1.086529,1.071782,1.061065,1.094687,1.138389
2020-08-01,1.189956,1.239895,0.88438,1.106517,1.095238,1.164756,1.197065,1.092856,1.071507,1.06928,1.09825,1.145607
2020-09-01,1.202625,1.243588,0.885145,1.110944,1.097903,1.172059,1.201695,1.092856,1.070132,1.08888,1.101733,1.148843
2020-10-01,1.226928,1.247738,0.88372,1.11289,1.102104,1.177944,1.204223,1.092856,1.07256,1.08888,1.105953,1.152553
2020-11-01,1.24472,1.2586,0.878909,1.117215,1.105659,1.182019,1.20928,1.092747,1.074882,1.089881,1.111364,1.156938
2020-12-01,1.24158,1.265069,0.881582,1.124675,1.109215,1.185753,1.228818,1.094814,1.084171,1.089881,1.116774,1.163234
2021-01-01,1.254837,1.277396,0.883898,1.137109,1.112447,1.191073,1.254332,1.094488,1.085332,1.094662,1.126837,1.168968


## Calculate inflation for basic basket

In [15]:
inflation_basket_2016_assign_map = {
    'alimentaria': lambda df, cols=canasta_alimentaria_cols: df[cols],
    'no_alimentaria': lambda df, cols=canasta_no_alimentaria_cols: gmean(df[cols], axis=1),
    'ampliada': lambda df, cols=canasta_cols: gmean(df[cols], axis=1)
}

inflation_basket_2016_assign_transformer = AssignTransformer(inflation_basket_2016_assign_map)
inflation_basket_2016_assigned = inflation_basket_2016_assign_transformer.transform(inflation_2016_appended)

In [16]:
basket_cols = ['alimentaria', 'no_alimentaria', 'ampliada']
inflation_basket_2016_final = inflation_basket_2016_assigned[basket_cols]

In [17]:
inflation_basket_2016_final.tail()

Unnamed: 0_level_0,alimentaria,no_alimentaria,ampliada
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-09-01,1.202625,1.106626,1.114324
2020-10-01,1.226928,1.109014,1.118391
2020-11-01,1.24472,1.112003,1.1225
2020-12-01,1.24158,1.117881,1.127701
2021-01-01,1.254837,1.125047,1.13533


# Save IPC base 2016

In [19]:
save_survey_with_pickle(inflation_basket_2016_final, DATA_DIR / 'inflation' / 'object' / 'inflation_basket_2016.pkl')

In [None]:
inflation = inflation_basket_2016_final['2020-10-01']
urbana = inflation * 2167.16
rural = inflation * 2076.49
print(f'urbana - {urbana}\nrural - {rural}')