In [1]:
# import packages
import pandas as pd
import math
import numpy as np
import statsmodels.api as sm
from scipy import stats
import statistics
import plotly.graph_objects as go
from scipy.stats import norm
from plotly.subplots import make_subplots
from sklearn.neighbors import KernelDensity

In [2]:
# Load the market prices
S_P100index = pd.read_excel("/files/exercises/Homeworks/HW4/TP4.xls", sheet_name = 'S&P100Index', skiprows = 4)
S_P100index.columns = ['date','S&P100','S&P500']
S_P100index = S_P100index.set_index('date')

# Load the stocks prices
S_P100const = pd.read_excel("/files/exercises/Homeworks/HW4/TP4.xls", sheet_name = 'S&P100Constituents', skiprows = 3)
S_P100const = S_P100const.drop([0])
S_P100const.rename(columns = {'Name': 'date'}, inplace=True)
S_P100const = S_P100const.set_index('date')
S_P100const = S_P100const.astype(float)

# Load the risk free rates
TBill3Months = pd.read_excel("/files/exercises/Homeworks/HW4/TP4.xls", sheet_name = 'TBill3Months', skiprows = 4)
TBill3Months.columns = ['date','US bill 3m']
TBill3Months = TBill3Months.set_index('date')

# Load something
FamaFrenchPortfolios = pd.read_excel("/files/exercises/Homeworks/HW4/TP4.xls", sheet_name = 'FamaFrenchPortfolios', skiprows = 21)
FamaFrenchPortfolios.columns = ['date','Small_Low BE/ME', 'Small_Med BE/ME', 'Small_High BE/ME','Big_Low BE/ME', 'Big_Med BE/ME', 'Big_High BE/ME']
FamaFrenchPortfolios = FamaFrenchPortfolios.set_index('date')

In [3]:
# Compute Arithmetic return for each market
AR_100 = pd.DataFrame((S_P100index.iloc[1:,0:].values - S_P100index.iloc[0:-1,0:].values) / S_P100index.iloc[0:-1,0:].values,  columns = ['S&P100','S&P500'])

# Compute Arithmetic return for each stocks
names_company = S_P100const.columns
AR_100_const = pd.DataFrame((S_P100const.iloc[1:,0:].values - S_P100const.iloc[0:-1,0:].values) / S_P100const.iloc[0:-1,0:].values, columns = names_company)

# Compute risk free rates (since it's given annualy we have to divide it by 100 and 52 to get weekly's rates)
AR_US3M = pd.DataFrame((TBill3Months.iloc[1:,0:].values / (100*52)),  columns = ['US 3 month'])

# Test of the CAPM : Time Series approach

## Exercice 1

In [4]:
# Compute the index of the return of interest
index_start = int(np.where(S_P100index.index == '1992-11-12')[0])
index_end = int(np.where(S_P100index.index == '2001-08-16')[0] - 1)

In [5]:
# Compute the beta of each stock thanks to the formula cov(ri,rm)/var(rm)
beta_stock = np.ones(AR_100_const.shape[1])
for i in range(0, AR_100_const.shape[1]):
    beta_stock[i] = np.cov(AR_100_const.iloc[index_start:index_end, i].values, AR_100.iloc[index_start:index_end, 1].values)[0, 1] / np.cov(AR_100_const.iloc[index_start:index_end, i].values, AR_100.iloc[index_start:index_end, 1].values)[1, 1]

pd.DataFrame(beta_stock, index = names_company, columns = ['Beta'])

Unnamed: 0,Beta
AES,1.012828
ALCOA,0.616923
ALLEGHENY TECHS.,0.702266
AMER.ELEC.PWR.,0.262885
AMER.EXPRESS,1.501120
...,...
WELLS FARGO & CO,1.124522
WEYERHAEUSER,0.713086
WILLIAMS COS.,0.817679
XEROX,1.349616


## Exercice 2

In [6]:
# Compute the alphas and betas of each stock thanks to the linear regression
nb_asset = len(AR_100_const.columns) # Compute the number of colums to deal with
betas = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the beta
alphas = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the alpha
std_alpha = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the std of alpha

for i in range(0, nb_asset):
    zi =  AR_100_const.iloc[index_start:index_end, i].values - AR_US3M.iloc[index_start:index_end, 0].values
    zm = AR_100.iloc[index_start:index_end, 1].values - AR_US3M.iloc[index_start:index_end, 0].values
    X = sm.add_constant(zm)
    y = zi
    
    # Fit regression on the data of interest and save the alpha, beta and std of alpha
    reg = sm.OLS(endog = y, exog = X)
    results = reg.fit()
    betas[i] = results.params[1]
    alphas[i] = results.params[0]
    std_alpha[i] = results.bse[0]

In [7]:
# Compute the t-value of the alpha --> alpha/std_alpha (if absolut value < 1.96 --> can not reject H0 (alpha = 0))
pd.DataFrame(np.transpose(np.array([betas, alphas, std_alpha, np.around(abs(alphas/std_alpha), decimals=2)])), index = names_company, columns = ['beta', 'alpha', 'std_alpha', 't-value'])

Unnamed: 0,beta,alpha,std_alpha,t-value
AES,1.012350,0.004353,0.002591,1.68
ALCOA,0.616899,0.002267,0.002010,1.13
ALLEGHENY TECHS.,0.703382,-0.001898,0.002084,0.91
AMER.ELEC.PWR.,0.262135,-0.000062,0.001372,0.05
AMER.EXPRESS,1.501551,0.001655,0.001557,1.06
...,...,...,...,...
WELLS FARGO & CO,1.124238,0.001355,0.001535,0.88
WEYERHAEUSER,0.713880,-0.000244,0.001858,0.13
WILLIAMS COS.,0.817565,0.002626,0.002032,1.29
XEROX,1.351780,-0.001567,0.002970,0.53


In [8]:
# Compute the number of stocks (over 95 = nb_asset) with alpha = 0 (statistically speaking)
np.sum(abs(alphas/std_alpha) < 1.96)

90

In [9]:
# Fit regression on the remaining data and save the r-squared of each stocks
index_start2 = int(np.where(S_P100index.index == '2001-08-16')[0] - 1)
r2 = np.zeros(nb_asset)

for i in range(0, nb_asset):
    zi =  AR_100_const.iloc[index_start2:, i].values - AR_US3M.iloc[index_start2:, 0].values
    zm = AR_100.iloc[index_start2:, 1].values - AR_US3M.iloc[index_start2:, 0].values
    X = sm.add_constant(zm)
    y = zi
    
    reg = sm.OLS(endog = y, exog = X, missing='drop')
    results = reg.fit()
    r2[i] = results.rsquared

In [10]:
# Compute the r-squared for each stocks
pd.DataFrame(np.around(r2, decimals=2), index = names_company, columns = ['R-squared']) 

Unnamed: 0,R-squared
AES,0.19
ALCOA,0.64
ALLEGHENY TECHS.,0.39
AMER.ELEC.PWR.,0.05
AMER.EXPRESS,0.73
...,...
WELLS FARGO & CO,0.39
WEYERHAEUSER,0.53
WILLIAMS COS.,0.15
XEROX,0.29


In [11]:
# Compute the average r-squared of each stocks
statistics.mean(np.around(r2, decimals=2))

0.32789473684210524

# Test of the CAPM : Cross-Section approach

## Exercice 1

In [12]:
# Compute the index of the return of interest
index_start3 = int(np.where(S_P100index.index == '1992-11-12')[0])
index_end3 = int(np.where(S_P100index.index == '1996-11-14')[0] - 1)

In [13]:
# Compute the alphas and betas of each stock thanks to the linear regression
nb_asset = len(AR_100_const.columns) # Compute the number of colums to deal with
betas1 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the beta

for i in range(0, nb_asset):
    zi =  AR_100_const.iloc[index_start3:index_end3, i].values - AR_US3M.iloc[index_start3:index_end3, 0].values
    zm = AR_100.iloc[index_start3:index_end3, 1].values - AR_US3M.iloc[index_start3:index_end3, 0].values
    X = sm.add_constant(zm)
    y = zi
    
    # Fit regression on the data of interest and save the alpha, beta and std of alpha
    reg = sm.OLS(endog = y, exog = X)
    results = reg.fit()
    betas1[i] = results.params[1]

## Exercice 2

In [14]:
index = np.argsort(betas1)
 
P1 = index[0:10]
P2 = index[10:20]
P3 = index[20:30]
P4 = index[30:40]
P5 = index[40:50]
P6 = index[50:60]
P7 = index[60:70]
P8 = index[70:80]
P9 = index[80:90]
P10 = index[90:95]

## Exercice 3

In [15]:
# Compute the index of the return of interest
index_start4 = int(np.where(S_P100index.index == '1996-11-14')[0] - 1)
index_end4 = int(np.where(S_P100index.index == '2000-11-16')[0] - 1)

In [16]:
# Compute the alphas and betas of each stock thanks to the linear regression
nb_asset = len(AR_100_const.columns) # Compute the number of colums to deal with
betas2 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the beta

for i in range(0, nb_asset):
    zi =  AR_100_const.iloc[index_start4:index_end4, i].values - AR_US3M.iloc[index_start4:index_end4, 0].values
    zm = AR_100.iloc[index_start4:index_end4, 1].values - AR_US3M.iloc[index_start4:index_end4, 0].values
    X = sm.add_constant(zm)
    y = zi
    
    # Fit regression on the data of interest and save the alpha, beta and std of alpha
    reg = sm.OLS(endog = y, exog = X)
    results = reg.fit()
    betas2[i] = results.params[1]

In [17]:
betaP1 = statistics.mean(betas2[P1])
betaP2 = statistics.mean(betas2[P2])
betaP3 = statistics.mean(betas2[P3])
betaP4 = statistics.mean(betas2[P4])
betaP5 = statistics.mean(betas2[P5])
betaP6 = statistics.mean(betas2[P6])
betaP7 = statistics.mean(betas2[P7])
betaP8 = statistics.mean(betas2[P8])
betaP9 = statistics.mean(betas2[P9])
betaP10 = statistics.mean(betas2[P10])

## Exercice 4

In [18]:
index_start_test = int(np.where(S_P100index.index == '2000-11-23')[0] - 1)

In [19]:
ARP1 = statistics.mean(AR_100_const.iloc[index_start_test, :].values[P1]) - AR_US3M.iloc[index_start_test, 0]
ARP2 = statistics.mean(AR_100_const.iloc[index_start_test, :].values[P2]) - AR_US3M.iloc[index_start_test, 0]
ARP3 = statistics.mean(AR_100_const.iloc[index_start_test, :].values[P3]) - AR_US3M.iloc[index_start_test, 0]
ARP4 = statistics.mean(AR_100_const.iloc[index_start_test, :].values[P4]) - AR_US3M.iloc[index_start_test, 0]
ARP5 = statistics.mean(AR_100_const.iloc[index_start_test, :].values[P5]) - AR_US3M.iloc[index_start_test, 0]
ARP6 = statistics.mean(AR_100_const.iloc[index_start_test, :].values[P6]) - AR_US3M.iloc[index_start_test, 0]
ARP7 = statistics.mean(AR_100_const.iloc[index_start_test, :].values[P7]) - AR_US3M.iloc[index_start_test, 0]
ARP8 = statistics.mean(AR_100_const.iloc[index_start_test, :].values[P8]) - AR_US3M.iloc[index_start_test, 0]
ARP9 = statistics.mean(AR_100_const.iloc[index_start_test, :].values[P9]) - AR_US3M.iloc[index_start_test, 0]
ARP10 = statistics.mean(AR_100_const.iloc[index_start_test, :].values[P10]) - AR_US3M.iloc[index_start_test, 0]
    
zi = np.array([ARP1, ARP2, ARP3, ARP4, ARP5, ARP6, ARP7, ARP8, ARP9, ARP10])
bi = np.array([betaP1, betaP2, betaP3, betaP4, betaP5, betaP6, betaP7, betaP8, betaP9, betaP10])
X = sm.add_constant(bi)
y = zi
    
# Fit regression on the data of interest and save the alpha, beta and std of alpha
reg = sm.OLS(endog = y, exog = X)
results = reg.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.880
Model:                            OLS   Adj. R-squared:                  0.865
Method:                 Least Squares   F-statistic:                     58.59
Date:                Sun, 18 Oct 2020   Prob (F-statistic):           5.99e-05
Time:                        15:47:08   Log-Likelihood:                 31.501
No. Observations:                  10   AIC:                            -59.00
Df Residuals:                       8   BIC:                            -58.40
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0465      0.011      4.321      0.0



In [20]:
index_start5 = int(np.where(S_P100index.index == '2000-11-23')[0] - 1)
index_end5 = int(np.where(S_P100index.index == '2002-11-14')[0] - 1)

In [21]:
# Compute the alphas and betas of each stock thanks to the linear regression
nb_date = len(range(index_start5, index_end5 + 1)) # Compute the number of line to deal with
phi0 = np.zeros(nb_date) # Initialize an array with nb_asset entries to receive the phi0
phi1 = np.zeros(nb_date) # Initialize an array with nb_asset entries to receive the phi0

for i in range(index_start5, index_end5 + 1):
    ARP1 = statistics.mean(AR_100_const.iloc[i, :].values[P1]) - AR_US3M.iloc[i, 0]
    ARP2 = statistics.mean(AR_100_const.iloc[i, :].values[P2]) - AR_US3M.iloc[i, 0]
    ARP3 = statistics.mean(AR_100_const.iloc[i, :].values[P3]) - AR_US3M.iloc[i, 0]
    ARP4 = statistics.mean(AR_100_const.iloc[i, :].values[P4]) - AR_US3M.iloc[i, 0]
    ARP5 = statistics.mean(AR_100_const.iloc[i, :].values[P5]) - AR_US3M.iloc[i, 0]
    ARP6 = statistics.mean(AR_100_const.iloc[i, :].values[P6]) - AR_US3M.iloc[i, 0]
    ARP7 = statistics.mean(AR_100_const.iloc[i, :].values[P7]) - AR_US3M.iloc[i, 0]
    ARP8 = statistics.mean(AR_100_const.iloc[i, :].values[P8]) - AR_US3M.iloc[i, 0]
    ARP9 = statistics.mean(AR_100_const.iloc[i, :].values[P9]) - AR_US3M.iloc[i, 0]
    ARP10 = statistics.mean(AR_100_const.iloc[i, :].values[P10]) - AR_US3M.iloc[i, 0]
    
    zi = np.array([ARP1, ARP2, ARP3, ARP4, ARP5, ARP6, ARP7, ARP8, ARP9, ARP10])
    bi = np.array([betaP1, betaP2, betaP3, betaP4, betaP5, betaP6, betaP7, betaP8, betaP9, betaP10])
    X = sm.add_constant(bi)
    y = zi
    
    # Fit regression on the data of interest and save the alpha, beta and std of alpha
    reg = sm.OLS(endog = y, exog = X)
    results = reg.fit()
    phi0[i-index_start5] = results.params[0]
    phi1[i-index_start5] = results.params[1]

In [22]:
mean_phi0 = statistics.mean(phi0)
mean_phi1 = statistics.mean(phi1)
T = len(phi0)

In [23]:
var_phi0 = np.sum((phi0-mean_phi0)**2)/(T*(T-1))
sd_phi0 = math.sqrt(var_phi0)
var_phi1 = np.sum((phi1-mean_phi1)**2)/(T*(T-1))
sd_phi1 = math.sqrt(var_phi1)

In [24]:
# 1.98 (2-sided) for t-distribution with dof of 103
abs(mean_phi0/sd_phi0) < 1.98

True

In [25]:
# 1.66 (1-sided) for t-distribution with dof of 103
mean_phi1/sd_phi1 > 1.66

False

# Alternatives to CAPM : Fama-French (1992) Model

In [26]:
# Compute FamaFrenchPortfolios rates
FFP = pd.DataFrame((FamaFrenchPortfolios.iloc[1:,0:].values / 100),  columns = ['Small_Low BE/ME', 'Small_Med BE/ME', 'Small_High BE/ME','Big_Low BE/ME', 'Big_Med BE/ME', 'Big_High BE/ME'])

In [27]:
small_return = np.mean(FFP.iloc[:,0:3].values, axis = 1)
big_return = np.mean(FFP.iloc[:,3:].values, axis = 1)
High_return = np.mean(FFP.iloc[:,[2,5]].values, axis = 1)
low_return = np.mean(FFP.iloc[:,[0,3]].values, axis = 1)
SmB = small_return - big_return
HmL = High_return - low_return

In [28]:
# Compute the index of the return of interest
index_start6 = int(np.where(S_P100index.index == '1992-11-12')[0])
index_end6 = int(np.where(S_P100index.index == '2001-08-16')[0] - 1)

In [29]:
# Compute the alphas and betas of each stock thanks to the linear regression
nb_asset = len(FFP.columns) # Compute the number of colums to deal with
alphas3 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the beta
betas3 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the alpha
r2_2 = np.zeros(nb_asset)
r2_adj = np.zeros(nb_asset)

for i in range(0, nb_asset):
    zi =  FFP.iloc[index_start6:index_end6, i].values - AR_US3M.iloc[index_start6:index_end6, 0].values
    zm = AR_100.iloc[index_start6:index_end6, 1].values - AR_US3M.iloc[index_start6:index_end6, 0].values
    X = sm.add_constant(zm)
    y = zi
    
    # Fit regression on the data of interest and save the alpha, beta and std of alpha
    reg = sm.OLS(endog = y, exog = X)
    results = reg.fit()
    alphas3[i] = results.params[0]
    betas3[i] = results.params[1]
    r2_2[i] = results.rsquared
    r2_adj[i] = results.rsquared_adj

In [30]:
# Compute the alphas and betas of each stock thanks to the linear regression
nb_asset = len(FFP.columns) # Compute the number of colums to deal with
alphas4 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the beta
betas4 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the alpha
s4 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the std of alpha
h4 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the std of alpha
r2_24 = np.zeros(nb_asset)
r2_adj4 = np.zeros(nb_asset)

for i in range(0, nb_asset):
    zi =  FFP.iloc[index_start6:index_end6, i].values - AR_US3M.iloc[index_start6:index_end6, 0].values
    si =  SmB[index_start6:index_end6] - AR_US3M.iloc[index_start6:index_end6, 0].values
    hi =  HmL[index_start6:index_end6] - AR_US3M.iloc[index_start6:index_end6, 0].values
    zm = AR_100.iloc[index_start6:index_end6, 1].values - AR_US3M.iloc[index_start6:index_end6, 0].values
    independent_vars = np.column_stack((zm, si, hi))
    X = sm.add_constant(independent_vars)
    y = zi
    
    # Fit regression on the data of interest and save the alpha, beta and std of alpha
    reg = sm.OLS(endog = y, exog = X)
    results = reg.fit()
    alphas4[i] = results.params[0]
    betas4[i] = results.params[1]
    s4[i] = results.params[2]
    h4[i] = results.params[3]
    r2_24[i] = results.rsquared
    r2_adj4[i] = results.rsquared_adj

In [31]:
# Compute the index of the return of interest
index_start7 = int(np.where(S_P100index.index == '2001-08-16')[0] - 1)

In [32]:
# Compute the alphas and betas of each stock thanks to the linear regression
nb_asset = len(FFP.columns) # Compute the number of colums to deal with
alphas5 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the beta
betas5 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the alpha
s5 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the std of alpha
h5 = np.zeros(nb_asset) # Initialize an array with nb_asset entries to receive the std of alpha
r2_25 = np.zeros(nb_asset)
r2_adj5 = np.zeros(nb_asset)

for i in range(0, nb_asset):
    zi =  FFP.iloc[index_start7:, i].values - AR_US3M.iloc[index_start7:, 0].values
    si =  SmB[index_start7:] - AR_US3M.iloc[index_start7:, 0].values
    hi =  HmL[index_start7:] - AR_US3M.iloc[index_start7:, 0].values
    zm = AR_100.iloc[index_start7:, 1].values - AR_US3M.iloc[index_start7:, 0].values
    independent_vars = np.column_stack((zm, si, hi))
    X = sm.add_constant(independent_vars)
    y = zi
    
    # Fit regression on the data of interest and save the alpha, beta and std of alpha
    reg = sm.OLS(endog = y, exog = X)
    results = reg.fit()
    alphas5[i] = results.params[0]
    betas5[i] = results.params[1]
    s5[i] = results.params[2]
    h5[i] = results.params[3]
    r2_25[i] = results.rsquared
    r2_adj5[i] = results.rsquared_adj