# Import

In [1]:
%load_ext autoreload
%autoreload 2

import os
import pickle
import zipfile
import urllib.request
from datetime import datetime
import pandas as pd
import numpy as np

from fredapi import Fred

**Global settings:**

In [2]:
DOWNLOAD_FRED = False # to re-download FRED data

# Auxiliary functions

**Functions:**

In [3]:
def download_and_unzip(url,name):
    
    zipfilename = f'{os.getcwd()}/data/{name}.zip'
    folder = f'{os.getcwd()}/data/{name}/'
    
    if os.path.isdir(folder): 
        print(f'data already downloaded and extracted to data/{name}/')
        return
    
    # b. download
    urllib.request.urlretrieve(url,zipfilename)
    
    # c. unzip
    with zipfile.ZipFile(zipfilename) as file:
        file.extractall(folder)
        
    print(f'downloaded and extracted to data/{name}/')
        

In [4]:
def resample_to_M(varQ):
    """ resample from quarterly to monthly series """
    
    # a. resample
    varM = varQ[~varQ.isnull()].resample('M',label='left').first()
    varM.index += pd.tseries.frequencies.to_offset('1D')
    varM = varM.interpolate()
    
    # b. print
    Q_index = varQ.index
    M_index = varM.index
    
    print(f'{varname}: [{str(Q_index[0])[:-12]}:{str(Q_index[-1])[:-12]}] -> [{str(M_index[0])[:-12]}:{str(M_index[-1])[:-12]}]')   
    
    return varM


In [5]:
def print_range(varname):
    
    I = ~np.isnan(data[varname].values)
    index = data.index[I]
        
    print(f'{varname}: {str(index[0])[:-12]}:{str(index[-1])[:-12]}')
        

# Create data

In [6]:
first = np.datetime64('1948-01')
last = np.datetime64(datetime.today().strftime("%Y-%m"))
data = pd.DataFrame(index=np.arange(first,last+1))

## FRED

In [7]:
fredvars = [    
    ('PCEPI',), # personal consumption deflator        
    ('UNRATE','u'), # unemployment rate
    ('FEDFUNDS','R'), # federal funds rate
    ('INDPRO',), # industrial production
    ('USREC',), # recession indicators
]

In [8]:
if DOWNLOAD_FRED:
    
    # a. setup API
    fred = Fred(api_key='d72233f789a0e13feb3dcb06d86647ea')
    
    # b. setup data dict
    data_fred = {}
    data_fred['M'] = {}
    data_fred['Q'] = {}
    data_fred['A'] = {}
    
    # c. load each variable
    for fredvar in fredvars:

        # i. unpack
        assert type(fredvar) is tuple
        fredname = fredvar[0]
        varname = fredname if len(fredvar) == 1 else fredvar[1]

        # ii. print info
        info = fred.get_series_info(fredname)

        title = info['title']
        freq = info['frequency_short']
        start = info['observation_start']
        end = info['observation_end']

        print(f'{fredname:15s} {varname:25s} {title:90s} {start[:-3]} : {end[:-3]} [{freq}]')
        
        # iii. load data
        data_fred[freq][varname] = fred.get_series(fredname).rename(varname)
        
    # d. save to disc
    with open('data/data_fred.pickle','wb') as handle:
        pickle.dump(data_fred,handle)
        
else:
    
    with open('data/data_fred.pickle','rb') as handle:
        data_fred = pickle.load(handle)
        

**Add FRED data:**

In [9]:
for i,(k,v) in enumerate(data_fred['M'].items()): data = data.join(v)

**Ex post real interest rate:**

In [10]:
data['Pi'] = ((data['PCEPI']/data['PCEPI'].shift(1))**12-1)*100
data['RealR'] = data['R']-data['Pi'].shift(-1)

**Logarithms:**

In [11]:
for varname in ['INDPRO','PCEPI']:
    data[f'log_{varname}'] = 100*np.log(data[varname])

## Labor market flows data

The data construction is documented in the repository [CPS-labor-market-flows](https://github.com/JeppeDruedahl/CPS-labor-market-flows).

**Download:**

In [12]:
# a. download
filename = 'data/Q.p'
if not os.path.isdir(filename):
    url = 'https://github.com/JeppeDruedahl/CPS-labor-market-flows/raw/main/Q.p'
    urllib.request.urlretrieve(url,filename)
    print(f'data/Q.p has been download')
else:
    print(f'data/Q.p has already been downloaded')
    
# b. open
with open('data/Q.p', 'rb') as f:
    Q = pickle.load(f)

data/Q.p has been download


**Transfer to data:**

In [13]:
for k,v in Q.items():
    
    # a. transfer
    data[f'l_{k}'] = v
    
    # b. calculate monthly transition event probability 
    data[k] = 100*(1.0-np.exp(-v))
    
    print_range(k)

EE: 1967-06:2019-12
EU: 1967-06:2019-12
EI: 1967-06:2019-12
UE: 1967-06:2019-12
UU: 1967-06:2019-12
UI: 1967-06:2019-12
IE: 1967-06:2019-12
IU: 1967-06:2019-12
II: 1967-06:2019-12


**Implied durations:**

In [14]:
data['dur_u'] = 1/data['l_UE']
data['dur_e'] = 1/data['l_EU']

## Data from Coibion

**Note:** Original file downloaded from [http://doi.org/10.3886/E114243V1](http://doi.org/10.3886/E114243V1).

In [15]:
# a. load from excel
index = np.arange(np.datetime64('1960-01'),np.datetime64('2008-08')+1)

CRBCPI = pd.read_excel(
    'data/114243-V1/AEJ_2010_0129_Data/Data_AEJ.xlsx',
    sheet_name='MonthlyData',names=['CRBCPI'],
    usecols='H',skiprows=6,nrows=index.size).squeeze("columns")

CRBCPI.index = index

# b. add to data
data['CRBCPI'] = CRBCPI
print_range('CRBCPI')

CRBCPI: 1960-01:2008-08


**Logarithm:**

In [16]:
data['log_CRBCPI'] = 100*np.log(data['CRBCPI'])

## Data from Miranda-Agrippino (Romer and Romer)

**Download:**

In [17]:
url = 'http://silviamirandaagrippino.com/s/Narrative-MP.zip'
download_and_unzip(url,'RomerRomer')

data already downloaded and extracted to data/RomerRomer/


**Process and transfer to data:**

In [18]:
# a. load
data_RR_df = pd.read_excel(f'{os.getcwd()}/data/RomerRomer/Narrative MP/NarrativeRomerRomerShock.xlsx',skiprows=[0])
data_RR_df.date = pd.to_datetime(data_RR_df.date,format='%YM%m')

# b. update data
data['shock_RR'] = pd.Series(data=data_RR_df['Romer & Romer (2004)'].values,index=data_RR_df.date)
data['shock_MA'] = pd.Series(data=data_RR_df['Miranda-Agrippino (2014)'].values,index=data_RR_df.date)

print_range('shock_RR')
print_range('shock_MA')

shock_RR: 1969-01:1996-12
shock_MA: 1969-01:2007-12


## Data from Regis Barnichon

**Download:**

In [19]:
filepath = 'data/data_RB.pickle'
if not os.path.isfile(filepath):
    
    # i. download
    #data_RB_df = pd.read_csv('https://drive.google.com/file/d/1s9yGoAt6wfpKaBGkP7xV7Hvs7RVV9deS/view?usp=sharing',header=None,skiprows=7,delimiter='\t')
    data_RB_df = pd.read_csv('data/HWI_index.txt',header=None,skiprows=7,delimiter='\t')
    
    # ii. structure
    data_RB_df.columns = ['date','v']
    data_RB_df.date = pd.to_datetime(data_RB_df.date,format='%YM%m')
    
    data_RB = {}
    data_RB['v'] = pd.Series(data=data_RB_df.v.values,index=data_RB_df.date)
    
    # iii. save
    with open(filepath,'wb') as handle:
        pickle.dump(data_RB,handle)    
    
    del data_RB_df
    
    print(f'data downloaded and saved to {filepath}')
    
else:
    
    with open('data/data_RB.pickle', 'rb') as f:
        data_RB = pickle.load(f)
        
    print(f'data loaded from {filepath}')

data loaded from data/data_RB.pickle


**Transfer to data:**

In [20]:
data['v'] = data_RB['v']

**Logarithm:**

In [21]:
data['log_v'] = 100*np.log(data['v'])

## Tighetness

In [22]:
data['theta'] = data['v']/data['u']
data['log_theta'] = 100*np.log(data['theta'])

# Data from Ramey and Fernald (qtr.)

## Ramey

In [23]:
url = 'https://econweb.ucsd.edu/~vramey/research/Ramey_HOM_technology.zip'
download_and_unzip(url,'Ramey_HOM_technology')

data already downloaded and extracted to data/Ramey_HOM_technology/


In [24]:
data_Q = pd.read_excel('data/Ramey_HOM_technology/Technology_data.xlsx',sheet_name='techdat')

In [25]:
year = np.floor(data_Q.quarter).astype('int').astype('str')
quarter = (1+(data_Q.quarter-np.floor(data_Q.quarter))*4).astype('int').astype('str')
data_Q['date'] = pd.to_datetime(year + '-Q' + quarter)
data_Q = data_Q.set_index('date')

Change bad naming:

In [26]:
data_Q['poplev'] = data_Q['pop']
del data_Q['pop']

## Fernald

In [27]:
url = 'https://drive.google.com/u/0/uc?id=1ezc7H70Rlnc7tGhB062ua0FG-eLmDJV6&export=download'
filename = f'{os.getcwd()}/data/fenald.xlsx'
if not os.path.isfile(filename):
    urllib.request.urlretrieve(url,filename)

In [28]:
df = pd.read_excel(f'{os.getcwd()}/data/fenald.xlsx',sheet_name='quarterly',skiprows=1,nrows=292)
df.date = pd.to_datetime(df.date.str[:4] + df.date.str[-2:])
df = df.set_index('date')
data_Q['dtfp_util'] = df['dtfp_util']

## Data from Haltiwanger (qtr.)

**Download:**

In [29]:
filepath = 'data/data_haltiwanger.pickle'
if not os.path.isfile(filepath):
    
    data_haltiwanger = pd.read_excel('http://econweb.umd.edu/~haltiwan/download/DFH_JEP_JobFlows/DFH_JEP_2006_Job_Flows.xls',
                                     skiprows=4,sheet_name='Final Estimates')
    data_haltiwanger['year'] = data_haltiwanger.QTR.astype('str').str[:4]
    data_haltiwanger['quarter'] = data_haltiwanger.QTR.astype('str').str[4]
    data_haltiwanger['date'] = pd.to_datetime(data_haltiwanger['year'] + '-Q' + data_haltiwanger['quarter'])
    
    # save to disc
    with open(filepath,'wb') as handle:
        pickle.dump(data_haltiwanger,handle)
        
    print(f'data downloaded and saved to {filepath}')
    
else:
    
    with open(filepath, 'rb') as f:
        data_haltiwanger = pickle.load(f)
        
    print(f'data loaded from {filepath}')        

data loaded from data/data_haltiwanger.pickle


**Proces:**

In [30]:
jd = data_Q['jd'] = pd.Series(data=data_haltiwanger.JD.values,index=data_haltiwanger.date)
jc = data_Q['jc'] = pd.Series(data=data_haltiwanger.JC.values,index=data_haltiwanger.date)

**Resample to monthly**

In [31]:
data['jd'] = resample_to_M(jd)
data['jc'] = resample_to_M(jc)

PCEPI: [1947-01:2005-01] -> [1947-01:2005-01]
PCEPI: [1947-01:2005-01] -> [1947-01:2005-01]


In [32]:
for varname in ['jc','jd']: print_range(varname)

jc: 1948-01:2005-01
jd: 1948-01:2005-01


**Logarithms**

In [33]:
for varname in ['jc','jd']:
    data[f'log_{varname}'] = 100*np.log(data[varname])

## Add monthly data to quarterly data

In [34]:
data_Q_from_m = data.resample('Q',label='left').mean()
data_Q_from_m.index += pd.tseries.frequencies.to_offset('1D')

In [35]:
for varname in data_Q_from_m:
    if not varname in data_Q:
        data_Q[varname] = data_Q_from_m[varname]

## Dump

In [36]:
with open('data/data.p','wb') as f:
    pickle.dump(data,f)

In [37]:
with open('data/data_Q.p','wb') as f:
    pickle.dump(data_Q,f)