In [49]:
from functools import partial, reduce
from os import rename, makedirs
import urllib.request
import pandas as pd
import os.path
import shutil
import json
import csv
import re

def gsheet_download_csv(id: str, target: str, url_template="https://docs.google.com/spreadsheets/d/{id}/export?format=csv"):
    url = url_template.format(id=id)
    return urllib.request.urlretrieve(url, target)

def match_relocate(src, target, pattern, file_list=None):
    files_to_move = file_list or os.listdir(src)

    pattern = re.compile(pattern)

    if not os.path.exists(target):
        makedirs(target)

    for filename in files_to_move:
        if pattern.match(filename):
            src_path = os.path.join(src, filename)
            dest_path = os.path.join(target, filename)

            shutil.move(src_path, dest_path)

    print(f"Files matching the pattern '{pattern}' moved to '{target}'.")

def compose2(f, g):
    def composed(*a, **kw):
        return f(g(*a, **kw))

    return composed

def compose(*fs):
    return reduce(compose2, fs)

def chain(*fs):
    return compose(*reversed(fs))

def exportar_definitivo(archivo: str, folder:str, df: pd.DataFrame, nuevo_nombre=None):
    if not nuevo_nombre:
        nuevo_nombre = f"{folder}/definitivos/{archivo}_old.csv"

    original_file = f'{folder}/definitivos/{archivo}.csv'

    rename(original_file, nuevo_nombre)

    df.to_csv(original_file, 
              encoding='utf-8', 
              sep=',', 
              quoting=csv.QUOTE_ALL, 
              quotechar='"', 
              lineterminator='\n', 
              decimal='.', 
              index=False)
    
    return original_file, nuevo_nombre

def drop_col(df: pd.DataFrame, col, axis=1):
    return df.drop(col, axis=axis)

def drop_colx(col, axis=1):
    return lambda df: drop_col(df, col, axis=axis)

def normalize_countries(df: pd.DataFrame, code_col:str, new_col:str, map: dict):
    df[new_col] = df[code_col].apply(map.__getitem__)
    return df

def normalize_countriesx(code_col, new_col, map):
    return lambda df: normalize_countries(df, code_col, new_col, map)

def wide_to_long(df: pd.DataFrame, primary_keys, value_name='valor', var_name='indicador'):
    return df.melt(id_vars=primary_keys, value_name=value_name, var_name=var_name)

def wide_to_longx(primary_keys, value_name='valor', var_name='indicador'):
    return lambda df: wide_to_long(df, primary_keys, value_name=value_name, var_name=var_name)

def rename_cols(df: pd.DataFrame, map):
    df = df.rename(columns=map)
    return df

def rename_colsx(map):
    return lambda df: rename_cols(df, map)

def replace_value(df:pd.DataFrame, col:str, curr_value:str, new_value:str):
    df = df.replace({col: curr_value}, new_value)
    return df

def replace_valuex(col, curr_value, new_value):
    return lambda df: replace_value(df=df, col=col, curr_value=curr_value, new_value=new_value)

def sort_vals_asc(df:pd.DataFrame, prim_keys:list):
    return df.sort_values(by=prim_keys).reset_index(drop=True)

def sort_vals_ascx(prim_keys):
    return lambda df: sort_vals_asc(df=df, prim_keys=prim_keys)

In [2]:
gsheet_download_csv('1kK1Yu6gz5kEWe_i0vamiGttkXUH5H90e', 'nomenclador.csv')
paises = pd.read_csv('./nomenclador.csv')
pais = {k:v for k,v in paises[['iso3', 'iso3_desc_fundar']].iloc}
# pais['SUD'] = pais['SDN']
# pais['OHI'] = "Otros países de ingreso alto"

In [51]:
diccionario_cambios = {}

COMEXT_g1.csv

In [50]:
SUBTOP = 'COMEXT'
entrega = 1
folder = f"../output/{SUBTOP}{entrega}"
grafico_n = 1

archivo = f'{SUBTOP}_g{grafico_n}'
df = pd.read_csv(f"{folder}/definitivos/{archivo}.csv")

mapping = pd.read_csv(f'{folder}/internal_mapping.csv')
mapping = {id:archivo for (_,_,archivo,id,_) in mapping.iloc}

plantilla = pd.read_excel(f'../tmp/ArgenData-{SUBTOP}.xlsx', header=6)
plantilla_slice = plantilla[plantilla.dataset_archivo == mapping[archivo]]

primary_keys = plantilla_slice.variable_nombre[plantilla_slice.primary_key].unique().tolist()
print(*primary_keys)
df.head()

anio


Unnamed: 0,anio,cantidades_exportacion_ferreres,cantidades_exportacion_indec
0,1810,0.019382,
1,1811,0.024362,
2,1812,0.010767,
3,1813,0.01373,
4,1814,0.018324,


In [52]:
drop = drop_colx
to_long = wide_to_longx
renombrar = rename_colsx
nomenclar_paises = partial(normalize_countriesx, map=pais)
replace = replace_valuex
sort = sort_vals_ascx
exportar = lambda df: exportar_definitivo(archivo=archivo, folder=folder, df=df, nuevo_nombre=None)


pipeline = chain(
    to_long(primary_keys),
    sort_vals_ascx(primary_keys)
    # replace(col='pov_type', curr_value='with_transfers', new_value='Con transferencias'),
    # replace(col='pov_type', curr_value='without_transfers', new_value='Sin transferencias'),
    # replace(col='pov_type', curr_value='difference', new_value='Diferencia')
    # drop('region_name'), 
    # renombrar({'region_code': 'cod_area'}),
    # nomenclar_paises(code_col='cod_area', new_col="desc_area"), 
    #renombrar({'iso3': 'pais'}),
   
)

_df = pipeline(df)


diccionario_cambios[archivo] = [
    "Se pasó a long estricto",
    "Se ordenó dataset ascendentemente por las primary_keys"
    ]

_df.head()

Unnamed: 0,anio,indicador,valor
0,1810,cantidades_exportacion_ferreres,0.019382
1,1810,cantidades_exportacion_indec,
2,1811,cantidades_exportacion_ferreres,0.024362
3,1811,cantidades_exportacion_indec,
4,1812,cantidades_exportacion_indec,


In [53]:
exportar(_df)

('../output/COMEXT1/definitivos/COMEXT_g1.csv',
 '../output/COMEXT1/definitivos/COMEXT_g1_old.csv')

COMEXT_g2.csv

In [54]:
SUBTOP = 'COMEXT'
entrega = 1
folder = f"../output/{SUBTOP}{entrega}"
grafico_n = 2

archivo = f'{SUBTOP}_g{grafico_n}'
df = pd.read_csv(f"{folder}/definitivos/{archivo}.csv")

mapping = pd.read_csv(f'{folder}/internal_mapping.csv')
mapping = {id:archivo for (_,_,archivo,id,_) in mapping.iloc}

plantilla = pd.read_excel(f'../tmp/ArgenData-{SUBTOP}.xlsx', header=6)
plantilla_slice = plantilla[plantilla.dataset_archivo == mapping[archivo]]

primary_keys = plantilla_slice.variable_nombre[plantilla_slice.primary_key].unique().tolist()
print(*primary_keys)
df.head()

time iso3


Unnamed: 0,time,iso3,countryname,exportsconstant_goods_v2,exportsconstant_servi_v2
0,2000,ABW,Aruba,,
1,1966,ABW,Aruba,,
2,2005,ABW,Aruba,,
3,2020,ABW,Aruba,,
4,2012,ABW,Aruba,,


In [55]:
df[(df.iso3=="INX")].isna().sum()/len(df[(df.iso3=="INX")])

time                        0.0
iso3                        0.0
countryname                 0.0
exportsconstant_goods_v2    1.0
exportsconstant_servi_v2    1.0
dtype: float64

In [56]:
df = df[~(df.iso3=="INX")].reset_index(drop=True)

In [57]:
drop = drop_colx
to_long = wide_to_longx
renombrar = rename_colsx
nomenclar_paises = partial(normalize_countriesx, map=pais)
replace = replace_valuex
sort = sort_vals_ascx
exportar = lambda df: exportar_definitivo(archivo=archivo, folder=folder, df=df, nuevo_nombre=None)



pipeline = chain(
    drop(col="countryname"),
    to_long(primary_keys),
    sort(primary_keys),
    renombrar({'iso3': 'cod_area'}),
    nomenclar_paises(code_col='cod_area', new_col="desc_area"),
    # replace(col='pov_type', curr_value='with_transfers', new_value='Con transferencias'),
    # replace(col='pov_type', curr_value='without_transfers', new_value='Sin transferencias'),
    # replace(col='pov_type', curr_value='difference', new_value='Diferencia')
    # drop('region_name'), 
    renombrar({'time': 'anio'}),
    # nomenclar_paises(code_col='cod_area', new_col="desc_area"), 
    #renombrar({'iso3': 'pais'}),
   
)

diccionario_cambios[archivo] = [
    "Se sacan las filas donde iso3 == INX por no poseer datos y no estar en la lista de nomencladores"
    "Se saca la variable 'countryname'",
    "Se pasó a long estricto",
    "Se utilizó nomenclador de paíse/regiones y se creó la columna 'desc_area'"
    ]

_df = pipeline(df)
_df.head()

Unnamed: 0,anio,cod_area,indicador,valor,desc_area
0,1960,ABW,exportsconstant_goods_v2,,Aruba
1,1960,ABW,exportsconstant_servi_v2,,Aruba
2,1960,AFE,exportsconstant_goods_v2,,África Oriental y del Sur
3,1960,AFE,exportsconstant_servi_v2,,África Oriental y del Sur
4,1960,AFG,exportsconstant_goods_v2,,Afganistán


In [58]:
exportar(_df)

('../output/COMEXT1/definitivos/COMEXT_g2.csv',
 '../output/COMEXT1/definitivos/COMEXT_g2_old.csv')

In [121]:
match_relocate(f'{folder}/definitivos', f'{folder}/old', '.*old.*')

Files matching the pattern 're.compile('.*old.*')' moved to '../output/POBREZ1/old'.


In [128]:
def f(x, resultado: list) -> int:
    resultado.append( f'{x} -> {x+1}' )
    return x+1

def g(y, resultado: list) -> int:
    r = y*2
    resultado.append( f'{y}*2 = {r}' )
    return r

In [131]:
a = []

f(3, a)
g(4, a)

8

In [132]:
a

['3 -> 4', '4*2 = 8']