In [1]:
#import sys
#import subprocess

#subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "numpy", "statsmodels", "matplotlib", "scipy", "openpyxl"])

In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import scipy.stats as stats

# Data Preparation

1) Load the cleaning function we created last time
2) Use the function to prepare the SPX and NVDA data

In [3]:
# you can use this cleaning function we created last time
def data_cleaning(benchm, asset):

    benchm["Date"] = pd.to_datetime(benchm["Date"])
    benchm = benchm.sort_values(by='Date', ascending=True) 

    asset["Date"] = pd.to_datetime(asset["Date"])
    asset = asset.sort_values(by='Date', ascending=True) 

    comb = benchm.merge(asset,on="Date", how="left")

    comb['return_benchmark'] = comb.iloc[:, 1] / comb.iloc[:, 1].shift(1)-1
    comb['return_asset'] = comb.iloc[:, 2] / comb.iloc[:, 2].shift(1)-1
    #comb['logreturn_benchmark'] = np.log(comb.iloc[:, 1] / comb.iloc[:, 1].shift(1))
    #comb['logreturn_asset'] = np.log(comb.iloc[:, 2] / comb.iloc[:, 2].shift(1))
    comb = comb.dropna()

    return comb

3) Load the Fama-French 5-factor (FF5) time-series data
4) Format the dates to datetime format
5) Merge the SPX/NVDA data and the FF5 data using the SPX/NVDA dataframe as the leading dataframe
6) Compute excess returns for NVDA by subtracting the risk-free rate (column "RF" in FF5) from simple NVDA returns

In [10]:
FF5 = pd.read_excel("F-F_Research_Data_5_Factors_2x3_daily.xlsx")
FF5["Date"] = pd.to_datetime(FF5["Date"])
print(FF5.head())

df = pd.read_excel("SPX.xlsx")
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date")

dfNV = pd.read_excel("NVDA.xlsx")
dfNV["Date"] = pd.to_datetime(df["Date"])
dfNV = dfNV.sort_values("Date")

print(df.head())
print(dfNV.head())


df = df.merge(dfNV, how="left", on="Date")

df["returnsSPX"] = (df["SPX"] / df["SPX"].shift(1)) -1
df["returnsNV"] = (df["NVDA"] / df["NVDA"].shift(1)) -1

df.dropna(inplace=True)

print(df.head())

df = df.merge(FF5, how= "left", on="Date")

df.to_csv("test.csv")


                           Date  Mkt-RF   SMB   HML   RMW   CMA    RF
0 1970-01-01 00:00:00.019630701   -0.67  0.00 -0.34 -0.01  0.16  0.01
1 1970-01-01 00:00:00.019630702    0.79 -0.26  0.26 -0.07 -0.20  0.01
2 1970-01-01 00:00:00.019630703    0.63 -0.17 -0.09  0.18 -0.34  0.01
3 1970-01-01 00:00:00.019630705    0.40  0.08 -0.27  0.09 -0.34  0.01
4 1970-01-01 00:00:00.019630708   -0.63  0.04 -0.18 -0.29  0.14  0.01
        Date      SPX
0 2023-01-03  3824.14
1 2023-01-04  3852.97
2 2023-01-05  3808.10
3 2023-01-06  3895.08
4 2023-01-09  3892.09
        Date    NVDA
0 2023-01-03  14.614
1 2023-01-04  14.315
2 2023-01-05  14.749
3 2023-01-06  14.265
4 2023-01-09  14.859
        Date      SPX    NVDA  returnsSPX  returnsNV
1 2023-01-04  3852.97  14.315    0.007539  -0.020460
2 2023-01-05  3808.10  14.749   -0.011646   0.030318
3 2023-01-06  3895.08  14.265    0.022841  -0.032816
4 2023-01-09  3892.09  14.859   -0.000768   0.041640
5 2023-01-10  3919.25  15.628    0.006978   0.051753


# Multivariate Regression

1) Regress NVDA excess returns on a constant and all 5 Fama-French factors ("Mkt-RF", "SMB", "HML", "RMW", "CMA")

# FED Probit model

1) Load the dataset containing the difference between the 3-month and 10-year Treasury yield (t10y3m)
2) Ensure the dates are in datetime format and create a new column that contains the first date of the month to use it as a month indicator
3) Group data by that month indicator taking the monthly mean of the yield spread

4) Load the dataset with monthly recession indicators and ensure the date column is in datetime format

5) Merge the two dataframes using on the month indicator the yield spread as the leading dataframe
6) Create a new column that gives the 12 months ahead recession indicator using the .shift() method
7) Drop NaN observations

8) Estimate a probit model with an intercept, the yield spread as an independent variable and 12 month ahead recession indicator as the dependent variable using data until December 2009

9) Based on this model, predict the probability of a recession for each month in the entire dataset and plot that probability over time 