# main.ipynb

## modules

### common

In [4]:
import os
import shutil
import json
from uuid import uuid4
from pathlib import Path

### requirements

In [6]:
!pip3 install requests

Collecting requests
  Using cached requests-2.27.1-py2.py3-none-any.whl (63 kB)
Collecting charset-normalizer~=2.0.0
  Using cached charset_normalizer-2.0.12-py3-none-any.whl (39 kB)
Collecting urllib3<1.27,>=1.21.1
  Downloading urllib3-1.26.9-py2.py3-none-any.whl (138 kB)
Collecting idna<4,>=2.5
  Using cached idna-3.3-py3-none-any.whl (61 kB)
Installing collected packages: urllib3, idna, charset-normalizer, requests
Successfully installed charset-normalizer-2.0.12 idna-3.3 requests-2.27.1 urllib3-1.26.9


In [8]:
!pip3 install pendulum

Collecting pendulum
  Downloading pendulum-2.1.2.tar.gz (81 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Collecting pytzdata>=2020.1
  Downloading pytzdata-2020.1-py2.py3-none-any.whl (489 kB)
Building wheels for collected packages: pendulum
  Building wheel for pendulum (PEP 517): started
  Building wheel for pendulum (PEP 517): finished with status 'done'
  Created wheel for pendulum: filename=pendulum-2.1.2-cp39-cp39-win_amd64.whl size=109761 sha256=6b8d04e24936df3553b4d32b2b764c5ca1a748c509b6b1dca94f9c5552a939a7
  Stored in directory: c:\users\piero\appdata\local\pip\cache\wheels\c9\c7\29\3cf0033406b18958d1a7b107c38be2405ef9c081b64a264fd5
Successfully built pendulum
Installing collected packages: pytzdata, pendulum


In [10]:
!pip3 install pandas

Collecting pandas
  Downloading pandas-1.4.2-cp39-cp39-win_amd64.whl (10.5 MB)
Collecting numpy>=1.18.5
  Downloading numpy-1.22.4-cp39-cp39-win_amd64.whl (14.7 MB)
Collecting pytz>=2020.1
  Downloading pytz-2022.1-py2.py3-none-any.whl (503 kB)
Installing collected packages: pytz, numpy, pandas
Successfully installed numpy-1.22.4 pandas-1.4.2 pytz-2022.1


In [11]:
!pip3 install fire

Collecting fire
  Downloading fire-0.4.0.tar.gz (87 kB)
Collecting termcolor
  Downloading termcolor-1.1.0.tar.gz (3.9 kB)
Building wheels for collected packages: fire, termcolor
  Building wheel for fire (setup.py): started
  Building wheel for fire (setup.py): finished with status 'done'
  Created wheel for fire: filename=fire-0.4.0-py2.py3-none-any.whl size=115942 sha256=56f496767481003e7b9a544c902db60c9e91da29b2ccb42922385b35e84e0b56
  Stored in directory: c:\users\piero\appdata\local\pip\cache\wheels\2a\93\86\8cd17bc6c40fb605c3ac549d0b860ef7e84ee5f67bf01a3287
  Building wheel for termcolor (setup.py): started
  Building wheel for termcolor (setup.py): finished with status 'done'
  Created wheel for termcolor: filename=termcolor-1.1.0-py3-none-any.whl size=4848 sha256=5d4f93cade6297c2534ca6a6f48053684a24d520c0ed8e8429b20c1559ee06da
  Stored in directory: c:\users\piero\appdata\local\pip\cache\wheels\b6\0d\90\0d1bbd99855f99cb2f6c2e5ff96f8023fad8ec367695f7d72d
Successfully built fire

In [12]:
import requests
import pendulum
import pandas as pd
import fire

## 1. pendulum

In [13]:
# > variables
region_city: str = 'America/Lima'

In [14]:
c_date = pendulum.now(rc := region_city).to_date_string()

print(c_date, '---', rc)

2022-05-28 --- America/Lima


## 2. requests

In [15]:
# > variables
serie = 'PM04902AA'
api_url = f'https://estadisticas.bcrp.gob.pe/estadisticas/series/api/{serie}/json/2020/2022/'
headers = {'Content-Type': 'application/json'}

In [17]:
# > request + try-except
try:
    response = requests.get(url=api_url, headers=headers, timeout=60)
    response.encoding = 'utf-8'
    api_data = json.loads(response.text)
    
    #error #ejemplo de variable no definida para forzar error
except Exception as e:
    print('[INFO] something went wrong...')
    raise #levanta el error anterior en try

#print("passed")

In [18]:
print(json.dumps(api_data, indent=2))

{
  "config": {
    "title": "Ingreso nacional disponible (millones S/ 2007)",
    "series": [
      {
        "name": "Ingreso nacional disponible (millones S/ 2007) - Renta de Factores",
        "dec": "0"
      }
    ]
  },
  "periods": [
    {
      "name": "2020",
      "values": [
        "-14748.8775165815"
      ]
    },
    {
      "name": "2021",
      "values": [
        "-46048.2056891399"
      ]
    }
  ]
}


## 3. pandas: dataframes

In [19]:
# > variables
record_path = 'periods'
columnas = {'name': 'YEAR', 'values': serie.upper()}

In [20]:
# > pandas from json
df = pd.json_normalize(api_data, record_path=record_path)

df.head()

Unnamed: 0,name,values
0,2020,[-14748.8775165815]
1,2021,[-46048.2056891399]


In [21]:
# > pandas rename column
df = df.rename(columns=columnas)

df.head()

Unnamed: 0,YEAR,PM04902AA
0,2020,[-14748.8775165815]
1,2021,[-46048.2056891399]


In [22]:
# > pandas format
df[serie] = df[serie].str[0].astype('float')

df.head()

Unnamed: 0,YEAR,PM04902AA
0,2020,-14748.877517
1,2021,-46048.205689


In [24]:
#valores originales de pandas previo al formato
df.values

array([['2020', -14748.8775165815],
       ['2021', -46048.2056891399]], dtype=object)

## 4. pandas: exporting data

In [None]:
# > variables
curr_date = c_date
exec_uuid = str(uuid4())
s = serie

save_path = f'./data/current/{curr_date}/{exec_uuid}/{s}.csv'

print(save_path)

In [None]:
# > split save path
save_folder = '/'.join(save_path.split('/')[:-1])

print(save_folder)

In [None]:
# > make save directory if not exists
Path(save_folder).mkdir(parents=True, exist_ok=True)

In [None]:
# > export data from dataframe to csv
dfc = df.copy(deep=True)
dfc.to_csv(save_path, sep=';', encoding='iso-8859-1', index=False)

## 5. main scenario

### 5.1. multiple solicitudes

In [None]:
# > fixed
series = ['PM04901AA', 'PM04902AA', 'PM04903AA', 'PM04904AA', 'PM04905AA', 'PM04906AA', 'PM04907AA']
exec_uuid = str(uuid4())

for serie in series:
    # > variables
    api_url = f'https://estadisticas.bcrp.gob.pe/estadisticas/series/api/{serie}/json/2020/2022/'
    headers = {'Content-Type': 'application/json'}
    
    # > request + try-except
    try:
        response = requests.get(url=api_url, headers=headers, timeout=60)
        response.encoding = 'utf-8'
        api_data = json.loads(response.text)
    except Exception as e:
        print('[INFO] something went wrong...')
        raise
    
    # > variables
    record_path = 'periods'
    columnas = {'name': 'YEAR', 'values': serie.upper()}
    
    # > pandas from json
    df = pd.json_normalize(api_data, record_path=record_path)
    df = df.rename(columns=columnas)
    df[serie] = df[serie].str[0].astype('float')
    
    # > variables
    curr_date = c_date
    s = serie
    save_path = f'./data/current/{curr_date}/{exec_uuid}/{s}.csv'
    
    # > split save path
    save_folder = '/'.join(save_path.split('/')[:-1])
    
    # > make save directory if not exists
    Path(save_folder).mkdir(parents=True, exist_ok=True)
    
    # > export data from dataframe to csv
    dfc = df.copy(deep=True)
    dfc.to_csv(save_path, sep=';', encoding='iso-8859-1', index=False)

### 5.2. dataframe auxiliar

In [None]:
# > variables
curr_date = c_date ; year = curr_date.split('-')[0]
read_path = '/'.join(save_path.split('/')[:-1])

In [None]:
# > dataframe dummy
tdf = pd.DataFrame({'YEAR': range(1940, int(year)+1), 'LOAD_DATE': curr_date})
tdf = tdf.set_index('YEAR')

tdf.head()

In [None]:
# > elementos a juntar
files = os.listdir(read_path)
for f in files:
    print(f)

In [None]:
# > complete table
files = os.listdir(read_path)
for f in files:
    df = pd.read_csv(f'{read_path}/{f}', sep=';', encoding='iso-8859-1')
    df = df.set_index('YEAR')
    tdf = tdf.join(df)

In [None]:
# > nulos por dummy table
print(tdf.isna().sum())
tdf.head()

In [None]:
# > reset index + drop nulos
tdf = tdf.reset_index()
tdf = tdf.dropna(subset=tdf.columns[2:])

In [None]:
# > output
print(tdf.isna().sum())
tdf.head()

### 5.3. exportar datos

In [None]:
# > variables
export_path = f'./data/output/{curr_date}/output_{exec_uuid}.csv'
save_folder = '/'.join(export_path.split('/')[:-1])
sep = '|'
encoding='iso-8859-1'

In [None]:
# > export full table
Path(save_folder).mkdir(parents=True, exist_ok=True)
df.to_csv(export_path, sep=sep, encoding=encoding, index=False)

In [None]:
# > move current to historic
current_path = read_path
historic_path = read_path.replace('current', 'historic')
shutil.move(current_path, historic_path)