In [5]:
%run "utils.ipynb"
import sqlite3 
import numpy as np
import pandas as pd 
from fredapi import Fred 
from collections import defaultdict
from contextlib import closing 

In [6]:
klems_uri_ = 'https://dataverse.harvard.edu/api/access/datafile/697683'

def create_update_map_df(x, columns, idx, func, to_field, from_field, f_map):
    df = pd.DataFrame(x, columns=columns)
    df[idx] = df[idx].apply(func)
    df[to_field] = df[from_field].map(f_map)
    cols = df.columns.tolist()
    df = df[cols[:2] + [cols[-1]] + cols[2:-1]]
    df = df.sort_values(by='year', kind='mergesort')
    df = df.set_index(idx)
    return(df)

def retrieve_from_fred(series, called, as_of, key='c179e56a36420dec093e40126e810fce'):
    fred = Fred(api_key=key)
    df = fred.get_series_as_of_date(series, as_of).dropna()[['date', 'value']]
    df.rename(columns={'value': called}, inplace=True)
    df.date = pd.to_datetime(df.date, format='%Y%m%d')
    df = df.set_index(['date'])
    return(df)

def retrieve_from_xls(uri, called=None, **kwargs):
    return(zip(called, pd.read_excel(uri, **kwargs).values()))

def retrieve_sector_data_raw(uri, sheets, idx, mapper=None, as_int=lambda x: pd.to_numeric(x, downcast='unsigned')):
    dfs = {k: v.dropna().as_matrix() for k,v in pd.read_excel(uri, sheetname=sheets, header=None).items()}
    sectors = defaultdict(lambda: None); sectors.update({x[0]: transform_name(x[1].lstrip()) for x in dfs[1]})
    header = [mapper[x] or x for x in dfs[2][0]]
    # columns = [sectors[x] or x for x in header]
    data = [dfs[3], dfs[4], dfs[3]/as_units(dfs[4],(0,1))]
    out = list(map(lambda x: create_update_map_df(x, header, idx, as_int, 'sector', 'sector_id', sectors), data))
    return({'nominal': out[0], 'prices': out[1], 'real': out[2]})

In [9]:
db_name = 'macrodata.db'
fred_series_as_of = '12/31/2016'
def update_fred(series_from_fred):
    with closing(sqlite3.connect(db_name)) as connection:
        series = [retrieve_from_fred(k, v, fred_series_as_of) for k,v in series_from_fred.items()]
        out = pd.concat(series, axis=0)
        out.to_sql('fred', connection, index=True, if_exists='replace', index_label='date')
        
def update_sector_data(uri=klems_uri_, n_sectors=88, field_update=None):
    with closing(sqlite3.connect(db_name)) as connection:
        field_update = field_update or {'industry': 'sector_id', 'industry output': 'output', 'USEnci': '99'}
        mapper = defaultdict(lambda: None)
        mapper.update({'USEcom%s' %x: x for x in range(1, n_sectors+1)})
        mapper.update(field_update)
        idx = ['year', 'sector_id']
        out = retrieve_sector_data_raw(uri=uri, sheets=[1,2,3,4], idx=idx, mapper=mapper)
        for table_name, rows in out.items():
            print('Writing [%s] into the database.' %(table_name))
            rows.to_sql('klems_%s' %(table_name), connection, index=True, if_exists='replace', index_label=idx)

In [40]:
# update_fred({'GDPC1': 'real_gdp', 'GDP': 'nominal_gdp'})

In [10]:
update_sector_data()

Writing [nominal] into the database.
Writing [prices] into the database.
Writing [real] into the database.
