In [1]:
import pandas as pd
import numpy as np
from datetime import date
from pathlib import Path

In [3]:
# source: https://data.bls.gov/pdq/SurveyOutputServlet
# monthly unemployment rate
raw_unemployment_data = pd.read_csv(Path("raw_data/USUnemployment.csv"), index_col="Year")

first_year = raw_unemployment_data.index[0]
last_year = raw_unemployment_data.index[-1]

mean_unemployment = pd.Series(
    (raw_unemployment_data.mean(axis="columns") / 100).array,
    name = "mean_unemployment",
    index = pd.date_range(start=date(year=first_year,month=12,day=31), end=date(year=last_year,month=12,day=31), freq="YE")
)
mean_unemployment

1948-12-31    0.037500
1949-12-31    0.060500
1950-12-31    0.052083
1951-12-31    0.032833
1952-12-31    0.030250
                ...   
2015-12-31    0.052750
2016-12-31    0.048750
2017-12-31    0.043417
2018-12-31    0.038917
2019-12-31    0.036667
Freq: YE-DEC, Name: mean_unemployment, Length: 72, dtype: float64

In [4]:
# source: https://fred.stlouisfed.org/series/CPIAUCNS 
# monthly CPI
raw_inflation_data = pd.read_csv(
    Path("raw_data/CPIAUCNS.csv"), 
    parse_dates=["observation_date"], 
    date_format="%Y-%m-%d",
    index_col="observation_date"
).squeeze()


inflation_rate = (
    raw_inflation_data
    .resample("YE")
    .apply(lambda x: x.iat[-1] / x.iat[0] - 1)
)

inflation_rate.name = "inflation_rate"
inflation_rate

observation_date
1913-12-31    0.020408
1914-12-31    0.010000
1915-12-31    0.019802
1916-12-31    0.115385
1917-12-31    0.170940
                ...   
2020-12-31    0.009703
2021-12-31    0.065830
2022-12-31    0.055661
2023-12-31    0.025323
2024-12-31    0.022943
Freq: YE-DEC, Name: inflation_rate, Length: 112, dtype: float64

In [5]:
#The reason why the Phillips-curve was stable up until 1970 are the stable inflation expectations. 
#Considering that people are neither dump nor blind it is safe to assume that the inflation expectations from 1900 to 1970 were the mean of the inflation in the said time period

# https://fred.stlouisfed.org/series/MICH
# Median expected price change next 12 months, Surveys of Consumers.
raw_inflation_expectations = pd.read_csv(
    Path("raw_data/michigan_inflation_expectations.csv"),
    parse_dates=["observation_date"],
    date_format="%Y-%m-%d",
    index_col="observation_date"
).squeeze()

expected_inflation = pd.Series(
    (raw_inflation_expectations / 100), 
    name="expected_inflation",
    index=pd.date_range(start="1913-01-01", end="2024-12-31", freq="YS")
)

# expected inflation for the current year as a mean of the expectations in the first 3 months of the year
expected_inflation = expected_inflation.resample("YS").apply(lambda x: x.loc[x.index.month<4].mean())

# TODO: figure out the actual expectations
# assumption: the inflation expectation for the next year is the mean inflation in the previous year
day = pd.tseries.offsets.DateOffset(1)
expected_inflation_before_1978 = pd.Series(inflation_rate.array, inflation_rate.index + day).loc[:"1977-01-01"]
expected_inflation = expected_inflation.fillna(value=expected_inflation_before_1978).dropna()
expected_inflation

1914-01-01    0.020408
1915-01-01    0.010000
1916-01-01    0.019802
1917-01-01    0.115385
1918-01-01    0.170940
                ...   
2020-01-01    0.025000
2021-01-01    0.030000
2022-01-01    0.049000
2023-01-01    0.039000
2024-01-01    0.029000
Freq: YS-JAN, Name: expected_inflation, Length: 111, dtype: float64

In [144]:
dataset = pd.concat(
    [inflation_rate, mean_unemployment, expected_inflation.resample("YE").ffill()], 
    axis="columns", 
    join="inner"
)
dataset.index.name = "year"
dataset

Unnamed: 0_level_0,inflation_rate,mean_unemployment,expected_inflation
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1948-12-31,0.016878,0.037500,0.088372
1949-12-31,-0.016667,0.060500,0.016878
1950-12-31,0.063830,0.052083,-0.016667
1951-12-31,0.043307,0.032833,0.063830
1952-12-31,0.007547,0.030250,0.043307
...,...,...,...
2015-12-31,0.012058,0.052750,0.025000
2016-12-31,0.019062,0.048750,0.025000
2017-12-31,0.015175,0.043417,0.026000
2018-12-31,0.013580,0.038917,0.027000


In [145]:
dataset.to_csv("dataset.csv")