In [70]:
from functools import partial, reduce
from os import rename, makedirs
import urllib.request
import pandas as pd
import os.path
import shutil
import json
import csv
import re

def gsheet_download_csv(id: str, target: str, url_template="https://docs.google.com/spreadsheets/d/{id}/export?format=csv"):
    url = url_template.format(id=id)
    return urllib.request.urlretrieve(url, target)

def match_relocate(src, target, pattern, file_list=None):
    files_to_move = file_list or os.listdir(src)

    pattern = re.compile(pattern)

    if not os.path.exists(target):
        makedirs(target)

    for filename in files_to_move:
        if pattern.match(filename):
            src_path = os.path.join(src, filename)
            dest_path = os.path.join(target, filename)

            shutil.move(src_path, dest_path)

    print(f"Files matching the pattern '{pattern}' moved to '{target}'.")

def compose2(f, g):
    def composed(*a, **kw):
        return f(g(*a, **kw))

    return composed

def compose(*fs):
    return reduce(compose2, fs)

def chain(*fs):
    return compose(*reversed(fs))

def exportar_definitivo(archivo: str, folder:str, df: pd.DataFrame, nuevo_nombre=None):
    if not nuevo_nombre:
        nuevo_nombre = f"{folder}/definitivos/{archivo}_old.csv"

    original_file = f'{folder}/definitivos/{archivo}.csv'

    rename(original_file, nuevo_nombre)

    df.to_csv(original_file, 
              encoding='utf-8', 
              sep=',', 
              quoting=csv.QUOTE_ALL, 
              quotechar='"', 
              lineterminator='\n', 
              decimal='.', 
              index=False)
    
    return original_file, nuevo_nombre

def drop_col(df: pd.DataFrame, col, axis=1):
    return df.drop(col, axis=axis)

def drop_colx(col, axis=1):
    return lambda df: drop_col(df, col, axis=axis)

def normalize_countries(df: pd.DataFrame, code_col:str, new_col:str, map: dict):
    df[new_col] = df[code_col].apply(map.__getitem__)
    return df

def normalize_countriesx(code_col, new_col, map):
    return lambda df: normalize_countries(df, code_col, new_col, map)

def wide_to_long(df: pd.DataFrame, primary_keys, value_name='valor', var_name='indicador'):
    return df.melt(id_vars=primary_keys, value_name=value_name, var_name=var_name)

def wide_to_longx(primary_keys, value_name='valor', var_name='indicador'):
    return lambda df: wide_to_long(df, primary_keys, value_name=value_name, var_name=var_name)

def rename_cols(df: pd.DataFrame, map):
    df = df.rename(columns=map)
    return df

def rename_colsx(map):
    return lambda df: rename_cols(df, map)

def replace_value(df:pd.DataFrame, col:str, curr_value:str, new_value:str):
    df = df.replace({col: curr_value}, new_value)
    return df

def replace_valuex(col, curr_value, new_value):
    return lambda df: replace_value(df=df, col=col, curr_value=curr_value, new_value=new_value)

In [65]:
gsheet_download_csv('1kK1Yu6gz5kEWe_i0vamiGttkXUH5H90e', 'paises.csv')
paises = pd.read_csv('./paises.csv')
pais = {k:v for k,v in paises[['iso3', 'iso3_desc_fundar']].iloc}
pais['SUD'] = pais['SDN']
pais['OHI'] = "Otros países de ingreso alto"

In [118]:
SUBTOP = 'POBREZ'
entrega = 1
folder = f"../output/{SUBTOP}{entrega}"
grafico_n = 22

archivo = f'{SUBTOP}_g{grafico_n}'
df = pd.read_csv(f"{folder}/definitivos/{archivo}.csv")

mapping = pd.read_csv(f'{folder}/internal_mapping.csv')
mapping = {id:archivo for (_,_,archivo,id,_) in mapping.iloc}

plantilla = pd.read_excel(f'../tmp/ArgenData-{SUBTOP}.xlsx', header=6)
plantilla_slice = plantilla[plantilla.dataset_archivo == mapping[archivo]]

primary_keys = plantilla_slice.variable_nombre[plantilla_slice.primary_key].unique().tolist()
print(*primary_keys)
df.head()

year semester poverty_line pov_type


Unnamed: 0,year,semester,poverty_line,pov_type,poverty_rate
0,2003,II,Indigencia,with_transfers,32.06144
1,2003,II,Indigencia,without_transfers,34.679829
2,2003,II,Indigencia,difference,2.618388
3,2004,I,Indigencia,with_transfers,29.734375
4,2004,I,Indigencia,without_transfers,31.940569


In [111]:
df.pov_type.unique()

array(['with_transfers', 'without_transfers', 'difference'], dtype=object)

In [119]:
drop = drop_colx
to_long = wide_to_longx
renombrar = rename_colsx
nomenclar_paises = partial(normalize_countriesx, map=pais)
replace = replace_valuex

exportar = lambda df: exportar_definitivo(archivo=archivo, folder=folder, df=df, nuevo_nombre=None)



pipeline = chain(
    to_long(primary_keys),
    replace(col='pov_type', curr_value='with_transfers', new_value='Con transferencias'),
    replace(col='pov_type', curr_value='without_transfers', new_value='Sin transferencias'),
    replace(col='pov_type', curr_value='difference', new_value='Diferencia')
    # drop('region_name'), 
    # renombrar({'region_code': 'cod_area'}),
    # nomenclar_paises(code_col='cod_area', new_col="desc_area"), 
    #renombrar({'iso3': 'pais'}),
     
)

_df = pipeline(df)
_df.head()

Unnamed: 0,year,semester,poverty_line,pov_type,indicador,valor
0,2003,II,Indigencia,Con transferencias,poverty_rate,32.06144
1,2003,II,Indigencia,Sin transferencias,poverty_rate,34.679829
2,2003,II,Indigencia,Diferencia,poverty_rate,2.618388
3,2004,I,Indigencia,Con transferencias,poverty_rate,29.734375
4,2004,I,Indigencia,Sin transferencias,poverty_rate,31.940569


In [122]:
pipeline

<function __main__.compose2.<locals>.composed(*a, **kw)>

In [120]:
exportar(_df)

('../output/POBREZ1/definitivos/POBREZ_g22.csv',
 '../output/POBREZ1/definitivos/POBREZ_g22_old.csv')

In [121]:
match_relocate(f'{folder}/definitivos', f'{folder}/old', '.*old.*')

Files matching the pattern 're.compile('.*old.*')' moved to '../output/POBREZ1/old'.


In [128]:
def f(x, resultado: list) -> int:
    resultado.append( f'{x} -> {x+1}' )
    return x+1

def g(y, resultado: list) -> int:
    r = y*2
    resultado.append( f'{y}*2 = {r}' )
    return r

In [131]:
a = []

f(3, a)
g(4, a)

8

In [132]:
a

['3 -> 4', '4*2 = 8']