In [352]:
import pandas as pd 
import numpy as np

## 1 Exercise: Business cycle correlations

#### 1.1

In [353]:
import glob

data = []

DATA_PATH = '/Users/andreas/NHH/tech2-local/TECH2-H24/data/FRED'

files = glob.glob(f'{DATA_PATH}/FRED_monthly_*.csv')

for file in files:
    data.append(pd.read_csv(file, parse_dates=['DATE']))

#### 1.2

In [354]:
df = pd.concat(data, axis = 0).set_index('DATE')
df.head()

Unnamed: 0_level_0,CPI,UNRATE,FEDFUNDS,REALRATE,LFPART,INFLATION
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1950-01-01,23.5,6.5,,,58.9,
1950-02-01,23.6,6.4,,,58.9,
1950-03-01,23.6,6.3,,,58.8,
1950-04-01,23.6,5.8,,,59.2,
1950-05-01,23.8,5.5,,,59.1,


#### 1.3

In [355]:
import os.path

fn = os.path.join(DATA_PATH, 'GDP.csv')
GDP = pd.read_csv(fn, parse_dates=['DATE'], index_col=['DATE'])

df = df.merge(GDP, on='DATE', how='inner')
df.head()

Unnamed: 0_level_0,CPI,UNRATE,FEDFUNDS,REALRATE,LFPART,INFLATION,GDP
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1950-01-01,23.5,6.5,,,58.9,,2346.1
1950-04-01,23.6,5.8,,,59.2,,2417.7
1950-07-01,24.1,5.0,,,59.1,,2511.1
1950-10-01,24.5,4.2,,,59.4,,2559.2
1951-01-01,25.4,3.7,,,59.1,,2594.0


#### 1.4

In [356]:
df_changes = df[['CPI', 'GDP']].pct_change()*100

variables = ['UNRATE', 'FEDFUNDS', 'REALRATE', 'LFPART']

df_changes[variables] = df[variables].diff()

df_changes.head()

Unnamed: 0_level_0,CPI,GDP,UNRATE,FEDFUNDS,REALRATE,LFPART
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1950-01-01,,,,,,
1950-04-01,0.425532,3.051873,-0.7,,,0.3
1950-07-01,2.118644,3.863176,-0.8,,,-0.1
1950-10-01,1.659751,1.915495,-0.8,,,0.3
1951-01-01,3.673469,1.3598,-0.5,,,-0.3


In [357]:
df_changes.corr().loc['GDP']

CPI         0.970083
GDP         1.000000
UNRATE     -0.158591
FEDFUNDS   -0.008052
REALRATE   -0.137800
LFPART      0.803695
Name: GDP, dtype: float64

## 3 Exercise: Decade averages of macro time series

#### 3.1

In [358]:
files = ['INFLATION', 'FEDFUNDS','LFPART','REALRATE','UNRATE']

df = None

for file in files:
    fn = os.path.join(DATA_PATH, f'{file}.csv')
    d = pd.read_csv(fn, parse_dates=['DATE'], index_col=['DATE'])

    if df is None:
       df = d

    else:
        df = df.join(d, how='outer')
df.head(5)

Unnamed: 0_level_0,INFLATION,FEDFUNDS,LFPART,REALRATE,UNRATE
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1948-01-01,10.2,,58.6,,3.4
1948-02-01,9.5,,58.9,,3.8
1948-03-01,6.8,,58.5,,4.0
1948-04-01,8.3,,59.0,,3.9
1948-05-01,9.4,,58.3,,3.5


#### 3.2

In [359]:
files = ['INFLATION', 'FEDFUNDS','LFPART','REALRATE','UNRATE']

data = []

for file in files:
    fn = os.path.join(DATA_PATH, f'{file}.csv')
    data.append(pd.read_csv(fn, parse_dates=['DATE'], index_col=['DATE']))

df2 = pd.concat(data, axis=1, join='outer')

df2.head(5)

Unnamed: 0_level_0,INFLATION,FEDFUNDS,LFPART,REALRATE,UNRATE
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1948-01-01,10.2,,58.6,,3.4
1948-02-01,9.5,,58.9,,3.8
1948-03-01,6.8,,58.5,,4.0
1948-04-01,8.3,,59.0,,3.9
1948-05-01,9.4,,58.3,,3.5


In [360]:
df.compare(df2)

DATE


#### 3.3

In [361]:
year = df.index.year
decade = (year // 10)*10

df['Decade'] = decade
df['Decade'].value_counts()

Decade
1950    120
1960    120
1970    120
1980    120
1990    120
2000    120
2010    120
2020     58
1940     24
Name: count, dtype: int64

In [362]:
def num_missing(x):
    n = x.isna().sum()
    return n

df_miss = df.groupby('Decade').agg(num_missing)
df_miss

Unnamed: 0_level_0,INFLATION,FEDFUNDS,LFPART,REALRATE,UNRATE
Decade,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1940,0,24,0,24,0
1950,0,54,0,120,0
1960,0,0,0,120,0
1970,0,0,0,120,0
1980,0,0,0,24,0
1990,0,0,0,0,0
2000,0,0,0,0,0
2010,0,0,0,0,0
2020,2,1,1,0,1


In [363]:
no_miss = df_miss.sum(axis=1)==0

no_miss = no_miss.to_frame('NotMissing')
no_miss = no_miss.reset_index()

df = df.merge(no_miss, how='left', on='Decade')
df.head(5)

Unnamed: 0,INFLATION,FEDFUNDS,LFPART,REALRATE,UNRATE,Decade,NotMissing
0,10.2,,58.6,,3.4,1940,False
1,9.5,,58.9,,3.8,1940,False
2,6.8,,58.5,,4.0,1940,False
3,8.3,,59.0,,3.9,1940,False
4,9.4,,58.3,,3.5,1940,False


####  3.4

In [364]:
df_no_miss = df.loc[df['NotMissing']].copy()

print(f'Final number of observations: {len(df_no_miss)}')

Final number of observations: 360


In [365]:
# drop NotMissing, don't want averages of these 
df_no_miss = df_no_miss.drop(columns=['NotMissing'])

# Compute decade means
df_no_miss.groupby('Decade').mean()

Unnamed: 0_level_0,INFLATION,FEDFUNDS,LFPART,REALRATE,UNRATE
Decade,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990,3.006667,5.14,66.668333,2.206667,5.7625
2000,2.568333,2.9525,66.236667,1.023333,5.541667
2010,1.771667,0.618333,63.295,-0.7325,6.220833
