In [16]:
from functools import partial, reduce
from os import rename, makedirs
import urllib.request
import pandas as pd
import os.path
import shutil
import json
import csv
import re

def gsheet_download_csv(id: str, target: str, url_template="https://docs.google.com/spreadsheets/d/{id}/export?format=csv"):
    url = url_template.format(id=id)
    return urllib.request.urlretrieve(url, target)

def match_relocate(src, target, pattern, file_list=None):
    files_to_move = file_list or os.listdir(src)

    pattern = re.compile(pattern)

    if not os.path.exists(target):
        makedirs(target)

    for filename in files_to_move:
        if pattern.match(filename):
            src_path = os.path.join(src, filename)
            dest_path = os.path.join(target, filename)

            shutil.move(src_path, dest_path)

    print(f"Files matching the pattern '{pattern}' moved to '{target}'.")

def compose2(f, g):
    def composed(*a, **kw):
        return f(g(*a, **kw))

    return composed

def compose(*fs):
    return reduce(compose2, fs)

def chain(*fs):
    return compose(*reversed(fs))

def exportar_definitivo(archivo: str, df: pd.DataFrame, nuevo_nombre=None):
    if not nuevo_nombre:
        nuevo_nombre = archivo+'_old.csv'

    original_file = f'./{archivo}.csv'

    rename(original_file, nuevo_nombre)

    df.to_csv(original_file, 
              encoding='utf-8', 
              sep=',', 
              quoting=csv.QUOTE_ALL, 
              quotechar='"', 
              lineterminator='\n', 
              decimal='.', 
              index=False)
    
    return original_file, nuevo_nombre

def drop_col(df: pd.DataFrame, col, axis=1):
    return df.drop(col, axis=axis)

def drop_colx(col, axis=1):
    return lambda df: drop_col(df, col, axis=axis)

def normalize_countries(df: pd.DataFrame, col, map: dict):
    df[col] = df[col].apply(map.__getitem__)
    return df

def normalize_countriesx(col, map):
    return lambda df: normalize_countries(df, col, map)

def wide_to_long(df: pd.DataFrame, primary_keys, value_name='valor', var_name='indicador'):
    return df.melt(id_vars=primary_keys, value_name=value_name, var_name=var_name)

def wide_to_longx(primary_keys, value_name='valor', var_name='indicador'):
    return lambda df: wide_to_long(df, primary_keys, value_name=value_name, var_name=var_name)

def rename_cols(df: pd.DataFrame, map):
    df = df.rename(columns=map)
    return df

def rename_colsx(map):
    return lambda df: rename_cols(df, map)

In [23]:
gsheet_download_csv('1kK1Yu6gz5kEWe_i0vamiGttkXUH5H90e', 'paises.csv')
paises = pd.read_csv('./paises.csv')
pais = {k:v for k,v in paises[['iso3', 'iso3_desc_fundar']].iloc}
pais['SUD'] = pais['SDN']

archivo = 'ACECON_g17'
df = pd.read_csv(archivo+'.csv')

mapping = pd.read_csv('./internal_mapping.csv')
mapping = {id:archivo for (_,_,archivo,id,_) in mapping.iloc}

plantilla = pd.read_excel('./ArgenData-ACECON.xlsx', header=6)
plantilla_slice = plantilla[plantilla.dataset_archivo == mapping[archivo]]

primary_keys = plantilla_slice.variable_nombre[plantilla_slice.primary_key].unique().tolist()
print(*primary_keys)
df.head()

trimestre anio


Unnamed: 0,trimestre,anio,indicador,valor
0,1q,2004,pbi,460369.4422
1,2q,2004,pbi,514395.6818
2,3q,2004,pbi,481151.9799
3,4q,2004,pbi,484543.6769
4,1q,2005,pbi,493602.5306


In [112]:
drop = drop_colx
to_long = wide_to_longx
renombrar = rename_colsx
nomenclar_paises = partial(normalize_countriesx, map=pais)

exportar = lambda df: exportar_definitivo(archivo=archivo, df=df, nuevo_nombre=None)

pipeline = chain(
    #drop('pais'), 
    #nomenclar_paises('iso3'), 
    to_long(primary_keys), 
    #renombrar({'iso3': 'pais'})
)

_df = pipeline(df)
_df.head()

Unnamed: 0,trimestre,anio,indicador,valor
0,1q,2004,pbi,460369.4422
1,2q,2004,pbi,514395.6818
2,3q,2004,pbi,481151.9799
3,4q,2004,pbi,484543.6769
4,1q,2005,pbi,493602.5306


In [113]:
exportar(_df)

('./ACECON_g17.csv', 'ACECON_g17_old.csv')

In [117]:
match_relocate('./', './old', '.*old.*')

Files matching the pattern 're.compile('.*old.*')' moved to './old'.
