In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import os
from datetime import datetime
from datetime import timedelta
import matplotlib as mt

## Create Function to Import Data from Yahoo Finance

In [2]:
def get_yfinance_data(ticker_name, period):
    ticker = ticker_name
    period = period
    
    data = yf.Ticker(ticker)
    historical_close = pd.DataFrame(data.history(period = period))["Close"]
    return(historical_close)

In [3]:
yahoo_data_list = ["SPY", "DJI"]

In [4]:
for ticker in yahoo_data_list:
    data = pd.DataFrame(get_yfinance_data(ticker, "max"))

## Read CSV Files in the Folder

In [8]:
# Adding working directory
directory = os.listdir("C:/Users/peter/Desktop/python-recession-model")

In [9]:
csv_file_list = []
csv_name = []
for file in directory:
    if file.endswith(".csv"):
        data_name = file.split(".")[0]
        data = pd.read_csv(file)
        csv_name.append(data_name)
        csv_file_list.append(data)

In [10]:
csv_name1 = pd.DataFrame(csv_name).apply(lambda x: x.str.lower().str.replace('(', '').str.replace(')', '').str.replace('-', '').str.replace(',', ' ').str.replace(' ', '_').str.replace('__', '_'))

  csv_name1 = pd.DataFrame(csv_name).apply(lambda x: x.str.lower().str.replace('(', '').str.replace(')', '').str.replace('-', '').str.replace(',', ' ').str.replace(' ', '_').str.replace('__', '_'))


In [11]:
csv_name1

Unnamed: 0,0
0,capacity_utilization_total_index
1,consumer_price_index_for_all_urban_consumers_a...
2,cpi
3,crude_oil_prices_west_texas_intermediate_wti_c...
4,dji
5,economic_policy_uncertainty_index_for_united_s...
6,federal_funds
7,federal_funds_effective_rate_daily
8,fed_model_data
9,industrial_production_total_index


In [12]:
table_dict = {}

for i in range(30):
        table_name = csv_name1.at[i, 0]
        data = pd.DataFrame(csv_file_list[i])
        table_dict[table_name] = data

In [13]:
# Format the date column name
for keys, values in table_dict.items():
    table_dict[keys] = values.rename(str.lower, axis = "columns")

In [14]:
table_count = 0
for keys, values in table_dict.items():
    number_row = len(values)
    top_two = values.head(2)
    bottom_two = values.tail(2)
    table_count = table_count + 1
    print("Table Number: " + str(table_count))
    print("Table Name: " + keys)
    print("Number of Row: "  + str(number_row))
    print(top_two)
    print(bottom_two)
    print("____________________________________________________________________________________________")

Table Number: 1
Table Name: capacity_utilization_total_index
Number of Row: 681
         date      tcu
0  1967-01-01  89.3902
1  1967-02-01  87.9750
           date      tcu
679  2023-08-01  79.5385
680  2023-09-01  79.6786
____________________________________________________________________________________________
Table Number: 2
Table Name: consumer_price_index_for_all_urban_consumers_all_items_in_u
Number of Row: 921
         date  cpiaucsl
0  1947-01-01     21.48
1  1947-02-01     21.62
           date  cpiaucsl
919  2023-08-01   306.269
920  2023-09-01   307.481
____________________________________________________________________________________________
Table Number: 3
Table Name: cpi
Number of Row: 914
         date  cpiaucsl
0  1947-01-01     21.48
1  1947-02-01     21.62
           date  cpiaucsl
912  2023-01-01   300.536
913  2023-02-01   301.648
____________________________________________________________________________________________
Table Number: 4
Table Name: crude_oil_p

# Format Date Object

In [15]:
def guess_date(string):
    for date_format in ["%Y-%m-%d", "%m/%d/%Y", "%d-%b-%y"]:
        try:
            return datetime.strptime(string, date_format).date()
        except ValueError:
            continue
    raise ValueError(string)

In [16]:
for keys, values in table_dict.items():
    date_string = values["date"]
    date_format = ["%Y-%m-%d", "%m/%d/%Y", "%d-%b-%Y"]
    values["date"] = date_string.apply(lambda x: guess_date(x)).apply(lambda x: x.replace(year = x.year - 100) if x.year > 2030 else x )

In [17]:
# Edge Case Fed Model Date Handling
delta = timedelta(days = 1)
table_dict["fed_model_data"]["date"] = table_dict["fed_model_data"]["date"].apply(lambda x: x + delta)

## Basic Information of each dataset

In [18]:
# Function to provide basic information about a list of dataset
def data_basic_infor(table_dict):
    table_count = 0
    for keys, values in table_dict.items():
        number_row = len(values)
        start_date = min(values["date"])
        end_date = max(values["date"])
        table_count = table_count + 1
        column_name = values.columns
        frequency = values["date"][1] - values["date"][0]
        print("Table Number: " + str(table_count))
        print("Table Name: " + keys)
        print("Column Names: " + str(column_name[0]) + ", " + str(column_name[1]))
        print("Number of Row: "  + str(number_row))
        print("Frequency: " + str(frequency))
        print("Start Date: " + str(start_date))
        print("End Date: " + str(end_date))
        print("____________________________________________________________________________________________")

In [19]:
# Overview of basic information about each dataset
data_basic_infor(table_dict)

Table Number: 1
Table Name: capacity_utilization_total_index
Column Names: date, tcu
Number of Row: 681
Frequency: 31 days, 0:00:00
Start Date: 1967-01-01
End Date: 2023-09-01
____________________________________________________________________________________________
Table Number: 2
Table Name: consumer_price_index_for_all_urban_consumers_all_items_in_u
Column Names: date, cpiaucsl
Number of Row: 921
Frequency: 31 days, 0:00:00
Start Date: 1947-01-01
End Date: 2023-09-01
____________________________________________________________________________________________
Table Number: 3
Table Name: cpi
Column Names: date, cpiaucsl
Number of Row: 914
Frequency: 31 days, 0:00:00
Start Date: 1947-01-01
End Date: 2023-02-01
____________________________________________________________________________________________
Table Number: 4
Table Name: crude_oil_prices_west_texas_intermediate_wti_cushing_oklahoma
Column Names: date, dcoilwtico
Number of Row: 9863
Frequency: 1 day, 0:00:00
Start Date: 1986-0

## Filter the Data and Create Updated List of Dataset

In [20]:
# Filter the list to delete based on the following requirements:
# 1. Has start date before 1975
# 2. Has end date after 2023
# 3. The update frequency is less than a month

list_to_delete = []
delta = timedelta(days = 32)
for keys, values in table_dict.items():
    frequency = values["date"][1] - values["date"][0]
    if min(values["date"]).year > 1975:
        list_to_delete.append(keys)
    elif frequency > delta:
        list_to_delete.append(keys)
    elif max(values["date"]).year < 2023:
        list_to_delete.append(keys)
        
        
print("The delete list has " + str(len(list_to_delete)) + " and has following items: ")
print("")
for table_name in list_to_delete:
    print(table_name)

The delete list has 10 and has following items: 

crude_oil_prices_west_texas_intermediate_wti_cushing_oklahoma
dji
economic_policy_uncertainty_index_for_united_states
inflation_consumer_prices_for_the_united_states
leading_index_for_the_united_states
t10y2y_daily
t10y3m_daily
university_of_michigan_inflation_expectation
velocity_of_m2_money_stock
vix


In [21]:
updated_table_dict = table_dict

for i in list_to_delete:
    del updated_table_dict[i]


In [22]:
data_basic_infor(updated_table_dict)

Table Number: 1
Table Name: capacity_utilization_total_index
Column Names: date, tcu
Number of Row: 681
Frequency: 31 days, 0:00:00
Start Date: 1967-01-01
End Date: 2023-09-01
____________________________________________________________________________________________
Table Number: 2
Table Name: consumer_price_index_for_all_urban_consumers_all_items_in_u
Column Names: date, cpiaucsl
Number of Row: 921
Frequency: 31 days, 0:00:00
Start Date: 1947-01-01
End Date: 2023-09-01
____________________________________________________________________________________________
Table Number: 3
Table Name: cpi
Column Names: date, cpiaucsl
Number of Row: 914
Frequency: 31 days, 0:00:00
Start Date: 1947-01-01
End Date: 2023-02-01
____________________________________________________________________________________________
Table Number: 4
Table Name: federal_funds
Column Names: date, fedfunds
Number of Row: 828
Frequency: 31 days, 0:00:00
Start Date: 1954-07-01
End Date: 2023-06-01
_______________________

# Merge for final table

In [23]:
final_data = updated_table_dict["capacity_utilization_total_index"]

for keys, values in updated_table_dict.items():
    print(keys)
    final_data = final_data.merge(values, how = "left", on = "date")

capacity_utilization_total_index
consumer_price_index_for_all_urban_consumers_all_items_in_u
cpi
federal_funds
federal_funds_effective_rate_daily
fed_model_data
industrial_production_total_index
initial_claims
labor_force_participation_rate
m1_monthly
m2_monthly
m3_monthly
market_yield_on_u
monetary_aggregates_and_their_components_broad_money_and_components_m3_for_united_states
monetary_base_total
nber_based_recession_indicators_for_the_united_states_from_the_period_following_the_peak_through_the_trough
sp500
unemployment_rate
university_of_michigan_consumer_sentiment


In [24]:
updated_final_data = final_data.drop(labels = ["icsa", "dgs10"], axis = 1)

In [25]:
updated_final_data["spread"] != None

0      True
1      True
2      True
3      True
4      True
       ... 
676    True
677    True
678    True
679    True
680    True
Name: spread, Length: 681, dtype: bool

In [26]:
updated_final_data = updated_final_data[-(updated_final_data["spread"].isna())].dropna().reset_index()

In [27]:
updated_final_data.isna().sum()

index                                             0
date                                              0
tcu_x                                             0
tcu_y                                             0
cpiaucsl_x                                        0
cpiaucsl_y                                        0
fedfunds                                          0
dff                                               0
10 year treasury yield                            0
3 month treasury yield                            0
3 month treasury yield (bond equivalent basis)    0
spread                                            0
rec_prob                                          0
nber_rec                                          0
indpro                                            0
civpart                                           0
m1sl                                              0
m2sl                                              0
mabmm301usm189s_x                                 0
mabmm301usm1

In [28]:
updated_final_data

Unnamed: 0,index,date,tcu_x,tcu_y,cpiaucsl_x,cpiaucsl_y,fedfunds,dff,10 year treasury yield,3 month treasury yield,...,civpart,m1sl,m2sl,mabmm301usm189s_x,mabmm301usm189s_y,bogmbase,usrec,value,unrate,umcsent
0,0,1967-01-01,89.3902,89.3902,32.900,32.900,4.94,5.00,4.84,4.96,...,59.5,171.9,481.6,4.816000e+11,4.816000e+11,63800.0,0,84.45,3.9,.
1,1,1967-02-01,87.9750,87.9750,33.000,33.000,5.00,4.00,4.58,4.72,...,59.3,173.0,485.1,4.851000e+11,4.851000e+11,63300.0,0,87.36,3.8,94.1
2,2,1967-03-01,87.0835,87.0835,33.000,33.000,4.53,3.75,4.63,4.56,...,59.1,174.8,489.7,4.897000e+11,4.897000e+11,63200.0,0,89.42,3.8,.
3,3,1967-04-01,87.5089,87.5089,33.100,33.100,4.05,4.50,4.54,4.26,...,59.4,174.2,492.1,4.921000e+11,4.921000e+11,63300.0,0,90.96,3.8,.
4,4,1967-05-01,86.3566,86.3566,33.100,33.100,3.94,4.00,4.59,3.84,...,59.3,175.7,497.2,4.972000e+11,4.972000e+11,63400.0,0,92.59,3.8,95.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
655,669,2022-10-01,80.6377,80.6377,297.987,297.987,3.08,3.08,3.52,3.13,...,62.2,20098.1,21432.1,2.143130e+13,2.143270e+13,5340200.0,0,3726.05,3.7,59.9
656,670,2022-11-01,80.2626,80.2626,298.598,298.598,3.78,3.08,3.98,3.72,...,62.2,19964.4,21398.7,2.139690e+13,2.139870e+13,5419400.0,0,3917.49,3.6,56.8
657,671,2022-12-01,78.9195,78.9195,298.990,298.990,4.10,3.83,3.89,4.15,...,62.3,19821.0,21358.1,2.135770e+13,2.135760e+13,5406000.0,0,3912.38,3.5,59.7
658,672,2023-01-01,79.6374,79.6374,300.536,300.536,4.33,4.33,3.62,4.25,...,62.4,19560.1,21212.6,2.121200e+13,2.122170e+13,5329000.0,0,3960.66,3.4,64.9


In [29]:
updated_final_data["12m_rec"] = 0

for i in updated_final_data.index:
    
    if i > 648:
        break
    
    rec_count = 0
    for j in range(12):
        rec_count = rec_count + updated_final_data.loc[i + j, "usrec"]
    
    if rec_count != 0:
        rec_count = 1
    
    print(rec_count)
    updated_final_data.loc[i , "12m_rec"] = rec_count

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0


In [30]:
list_to_drop = ["usrec", "10 year treasury yield", "3 month treasury yield", "3 month treasury yield (bond equivalent basis)"]

updated_final_data.drop(labels = list_to_drop, axis = 1)

Unnamed: 0,index,date,tcu_x,tcu_y,cpiaucsl_x,cpiaucsl_y,fedfunds,dff,spread,rec_prob,...,civpart,m1sl,m2sl,mabmm301usm189s_x,mabmm301usm189s_y,bogmbase,value,unrate,umcsent,12m_rec
0,0,1967-01-01,89.3902,89.3902,32.900,32.900,4.94,5.00,-0.25,26.92%,...,59.5,171.9,481.6,4.816000e+11,4.816000e+11,63800.0,84.45,3.9,.,0
1,1,1967-02-01,87.9750,87.9750,33.000,33.000,5.00,4.00,-0.26,31.88%,...,59.3,173.0,485.1,4.851000e+11,4.851000e+11,63300.0,87.36,3.8,94.1,0
2,2,1967-03-01,87.0835,87.0835,33.000,33.000,4.53,3.75,-0.05,28.40%,...,59.1,174.8,489.7,4.897000e+11,4.897000e+11,63200.0,89.42,3.8,.,0
3,3,1967-04-01,87.5089,87.5089,33.100,33.100,4.05,4.50,0.17,26.25%,...,59.4,174.2,492.1,4.921000e+11,4.921000e+11,63300.0,90.96,3.8,.,0
4,4,1967-05-01,86.3566,86.3566,33.100,33.100,3.94,4.00,0.66,29.46%,...,59.3,175.7,497.2,4.972000e+11,4.972000e+11,63400.0,92.59,3.8,95.9,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
655,669,2022-10-01,80.6377,80.6377,297.987,297.987,3.08,3.08,0.32,8.46%,...,62.2,20098.1,21432.1,2.143130e+13,2.143270e+13,5340200.0,3726.05,3.7,59.9,0
656,670,2022-11-01,80.2626,80.2626,298.598,298.598,3.78,3.08,0.17,6.66%,...,62.2,19964.4,21398.7,2.139690e+13,2.139870e+13,5419400.0,3917.49,3.6,56.8,0
657,671,2022-12-01,78.9195,78.9195,298.990,298.990,4.10,3.83,-0.36,6.83%,...,62.3,19821.0,21358.1,2.135770e+13,2.135760e+13,5406000.0,3912.38,3.5,59.7,0
658,672,2023-01-01,79.6374,79.6374,300.536,300.536,4.33,4.33,-0.74,7.70%,...,62.4,19560.1,21212.6,2.121200e+13,2.122170e+13,5329000.0,3960.66,3.4,64.9,0


In [82]:
def get_monthly_change_rate(columns):
    monthly_change = []
    
    target_index = (columns.index + 1)
    target_index = target_index.drop([len(target_index)])
    
    for row in target_index:
        change = (columns[row]/columns[row - 1]) - 1
        monthly_change.append(change)
        
    return(monthly_change)
            

In [84]:
spread_change = get_monthly_change_rate(updated_final_data["spread"])

  change = (columns[row]/columns[row - 1]) - 1


In [None]:
def change_momentum(columns):
    