In [1]:
import pandas as pd
import numpy as np

In [2]:
# source: https://data.bls.gov/pdq/SurveyOutputServlet
# monthly unemployment rate
raw_unemployment_data = pd.read_csv("USUnemployment.csv", index_col="Year")

mean_unemployment = (raw_unemployment_data.mean(axis="columns") / 100).round(4)
mean_unemployment.name = "mean_unemployment"

In [3]:
# source: https://fred.stlouisfed.org/series/CPIAUCNS 
# monthly CPI
raw_inflation_data = pd.read_csv(
    "CPIAUCNS.csv", 
    parse_dates=["observation_date"], 
    date_format="%Y-%m-%d",
    index_col="observation_date"
)


inflation_rate = (
    raw_inflation_data["CPIAUCNS"]
    .groupby(raw_inflation_data.index.year)
    .apply(lambda x: x.iat[-1] / x.iat[0] - 1)
)

inflation_rate.name = "inflation_rate"

In [4]:
#The reason why the Phillips-curve was stable up until 1970 are the stable inflation expectations. 
#Considering that people are neither dump nor blind it is safe to assume that the inflation expectations from 1900 to 1970 were the mean of the inflation in the said time period

# https://fred.stlouisfed.org/series/MICH
# Median expected price change next 12 months, Surveys of Consumers.
raw_inflation_expectations = pd.read_csv(
    "michigan_inflation_expectations.csv",
    parse_dates=["observation_date"],
    date_format="%Y-%m-%d",
    index_col="observation_date"
)


mean_inflation_until_1970 = inflation_rate.loc[:1970].mean().round(5)
expected_inflation_until_1970 = pd.Series([mean_inflation_until_1970]*70, index=range(1900,1970))

expected_inflation_after_1977 = pd.Series()
for year, expectations in raw_inflation_expectations["MICH"].groupby(raw_inflation_expectations.index.year):
    expected_inflation_after_1977[year] = expectations.iat[0].round(4) / 100


expected_inflation = pd.concat([expected_inflation_until_1970, expected_inflation_after_1977])
expected_inflation = expected_inflation.reindex(range(1900,2025))
expected_inflation.name = "expected_inflation"

In [5]:
LIMITING_QUANTILE = 1
dataset = pd.concat([inflation_rate, mean_unemployment, expected_inflation], axis="columns", join="inner")
quantile = dataset.quantile(LIMITING_QUANTILE)

dataset.loc[dataset["inflation_rate"]>quantile["inflation_rate"], "inflation_rate"] = np.nan
dataset.loc[dataset["mean_unemployment"]>quantile["mean_unemployment"], "mean_unemployment"] = np.nan
dataset.loc[dataset["expected_inflation"]>quantile["expected_inflation"], "expected_inflation"] = np.nan

In [13]:
dataset.to_csv("dataset.csv")