Standard Imports

In [28]:
import pandas as pd
import numpy as np
import statsmodels.api as sm 
from sklearn.preprocessing import scale, normalize
from scipy.stats import zscore #computes changes in terms of standard deviations
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic
import statsmodels.api as sm #sm.GLS(y,x) or sm.OLS(y,x)
import copy
from statsmodels.tsa.arima_model import ARMA
from scipy import stats

Import Data

In [49]:
data = pd.read_csv('r_ES50_d_cleaned_realized.csv', index_col='Date',  parse_dates=True)

In [50]:
data.columns

Index(['ADS.DE', 'AI.PA', 'ALV.DE', 'ASML.AS', 'CS.PA', 'BBVA.MC', 'SAN.MC',
       'BAS.DE', 'BAYN.DE', 'BMW.DE', 'BNP.PA', 'CRG.IR', 'SGO.PA', 'DAI.DE',
       'DTE.DE', 'ENEL.MI', 'ENGI.PA', 'ENI.MI', 'EOAN.DE', 'EL.PA', 'FRE.DE',
       'BN.PA', 'IBE.MC', 'INGA.AS', 'ISP.MI', 'OR.PA', 'MC.PA', 'MUV2.DE',
       'NOKIA.HE', 'ORA.PA', 'PHIA.AS', 'SAF.PA', 'SAN.PA', 'SAP.DE', 'SU.PA',
       'SIE.DE', 'TEF.MC', 'FP.PA', 'URW.AS', 'UNA.AS', 'DG.PA', 'VIV.PA',
       'VOW.DE', '1/N'],
      dtype='object')

In [51]:
#compute 1/N portfolio
temp = data['1/N']
data.drop(columns=['1/N'], inplace=True)
w_eqw = [1/data.shape[1] for i in range(data.shape[1])]
data['1/N'] = data.dot(w_eqw)

In [14]:
#Compute Returns from closing prices
#sap_eod_data['pct_change'] = sap_eod_data['Close'].pct_change() #computes percentage change, which is different to log return

#sap_eod_data['log_return'] = np.log(sap_eod_data['Close']) - np.log(sap_eod_data['Close'].shift(1)) # log return

#sap_eod_data['ln_return'] = np.log(1+sap_eod_data['Close'].pct_change()) # log return

In [55]:
df_mean_std = data.agg(['mean', 'std']).T

sharp_ratio = df_mean_std['mean'] / df_mean_std['std'] #The higher the better the relationship between return and risk

Unnamed: 0,mean,std
ADS.DE,0.000601,0.019164
AI.PA,0.000496,0.015732
ALV.DE,5.2e-05,0.021632
ASML.AS,0.000474,0.028144
CS.PA,0.00026,0.027866
BBVA.MC,-9.4e-05,0.021493
SAN.MC,-5e-05,0.022108
BAS.DE,0.000359,0.018277
BAYN.DE,0.000201,0.020366
BMW.DE,0.000291,0.020759


In [56]:
data.describe()

Unnamed: 0,ADS.DE,AI.PA,ALV.DE,ASML.AS,CS.PA,BBVA.MC,SAN.MC,BAS.DE,BAYN.DE,BMW.DE,...,SU.PA,SIE.DE,TEF.MC,FP.PA,URW.AS,UNA.AS,DG.PA,VIV.PA,VOW.DE,1/N
count,5285.0,5285.0,5285.0,5285.0,5285.0,5285.0,5285.0,5285.0,5285.0,5285.0,...,5285.0,5285.0,5285.0,5285.0,5285.0,5285.0,5285.0,5285.0,5285.0,5285.0
mean,0.000601,0.000496,5.2e-05,0.000474,0.00026,-9.4e-05,-5e-05,0.000359,0.000201,0.000291,...,0.00037,0.000102,-0.000143,0.000323,0.000249,0.00039,0.000684,-4.8e-05,0.000252,0.000217
std,0.019164,0.015732,0.021632,0.028144,0.027866,0.021493,0.022108,0.018277,0.020366,0.020759,...,0.02101,0.02317,0.018501,0.017266,0.018114,0.014929,0.018943,0.021851,0.026014,0.013779
min,-0.166886,-0.118337,-0.166382,-0.233179,-0.2035,-0.17649,-0.221725,-0.146707,-0.194076,-0.141391,...,-0.228129,-0.428491,-0.175791,-0.181622,-0.24353,-0.107246,-0.187227,-0.294679,-0.25695,-0.145754
25%,-0.008512,-0.007434,-0.008681,-0.01221,-0.008052,-0.0105,-0.010468,-0.008615,-0.009396,-0.009818,...,-0.009874,-0.009588,-0.008579,-0.008325,-0.007711,-0.007052,-0.008318,-0.009014,-0.010261,-0.006017
50%,0.0,0.000568,0.000353,0.000444,0.0,0.0,0.0,0.000435,0.0,0.0,...,0.000468,0.0,0.0,0.000578,0.000271,0.000224,0.000135,0.0,0.0,0.000761
75%,0.009572,0.008415,0.009202,0.01308,0.008712,0.010152,0.010327,0.009386,0.010247,0.010228,...,0.010819,0.0097,0.008453,0.009003,0.008584,0.007798,0.009277,0.009075,0.011238,0.006722
max,0.162592,0.112783,0.233053,0.21016,0.192927,0.199073,0.208774,0.176407,0.330062,0.138356,...,0.151643,0.435903,0.164084,0.139762,0.16736,0.116166,0.182523,0.202723,0.902682,0.10498


PCA Analysis

In [15]:
pca = PCA()

pca_comp = pca.fit_transform(zscore(data))

pca.explained_variance_ratio_

pcas = pd.DataFrame(data=pca_comp, index=data.index)

pc1 = pcas.iloc[:,0]

Standardize using zscore

In [16]:
#zscore(data)

#scale(data)

data.apply(zscore)

Unnamed: 0_level_0,ADS.DE,AI.PA,ALV.DE,ASML.AS,CS.PA,BBVA.MC,SAN.MC,BAS.DE,BAYN.DE,BMW.DE,...,SU.PA,SIE.DE,TEF.MC,FP.PA,URW.AS,UNA.AS,DG.PA,VIV.PA,VOW.DE,1/N
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-05,-1.283621,-0.624764,-0.221352,-3.578558,-0.561518,-1.052253,-1.110458,-2.150297,-1.331139,-0.976907,...,-2.059845,-1.490252,-2.578986,-1.494511,-0.065909,1.415768,1.150000,-1.114201,-1.468482,-1.672044
2000-01-06,2.158616,4.380705,1.861432,-2.593574,-0.288654,0.296547,0.002275,0.185816,0.157240,-0.170552,...,2.335955,-0.535541,0.054968,-1.725161,1.618312,2.398010,1.695011,-0.537793,-0.179354,0.426928
2000-01-07,-0.751185,-0.814621,1.265788,2.541890,1.095060,0.294724,1.962266,2.179846,1.995106,-0.101209,...,0.843482,1.698018,0.054926,0.269215,-0.356731,0.936059,1.640068,0.372395,1.083602,1.713298
2000-01-10,0.752760,-2.285201,0.263419,2.454048,-0.009346,-0.750695,-0.399052,0.220590,1.741706,1.868841,...,-0.816643,2.490551,1.613215,0.029180,0.581891,-0.796931,-2.282930,3.939767,0.097947,0.685639
2000-01-11,-0.519168,-0.617456,-0.425752,0.417526,-0.834444,-0.939194,-1.556388,-2.504947,-1.484559,-0.182163,...,-1.882159,0.292244,0.053619,0.410175,0.237769,-0.866780,2.210665,0.852950,0.587570,-0.352062
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-25,0.475416,0.295024,0.703729,0.010745,0.305117,0.599061,1.121855,0.035444,-1.476856,0.751685,...,0.259277,0.360649,1.254581,0.443761,1.185565,-0.122268,0.559394,0.380724,0.720039,0.568956
2020-06-26,-1.139409,-0.408532,-0.713767,0.203180,-0.094485,-1.115047,-1.404249,-1.131934,-0.840936,-1.132575,...,0.345823,-0.122901,-1.084888,-0.125721,-2.928218,0.412141,-0.488637,-0.136876,-0.359301,-0.574869
2020-06-29,-0.390967,0.670328,0.570608,-0.214824,0.172299,1.290154,1.789587,1.569543,0.070944,0.791260,...,0.614191,0.776829,1.351313,0.123982,0.630159,-1.740727,0.877797,0.398491,0.393441,0.900110
2020-06-30,0.462453,0.415593,0.390952,0.497762,-0.024772,-0.268181,-0.668820,-0.293198,-0.831446,0.079359,...,0.117393,0.434579,-0.888508,-0.990776,0.140549,-0.843449,-0.510430,-0.713579,-0.143588,-0.142601


Annualize returns and volatility

In [17]:
returns * 252 #annualize returns

vol * np.sqrt(252) #annualize vol

NameError: name 'returns' is not defined

GLS/OLS Regression

In [19]:
data['rf'] = 0.1

data_es50 = data.drop(columns=['1/N', 'rf'], axis=1)

In [20]:
data['ADS.DE'] - data['rf']

Date
2000-01-05   -0.123996
2000-01-06   -0.058036
2000-01-07   -0.113793
2000-01-10   -0.084975
2000-01-11   -0.109347
                ...   
2020-06-25   -0.090289
2020-06-26   -0.121232
2020-06-29   -0.106891
2020-06-30   -0.090538
2020-07-01   -0.105150
Length: 5285, dtype: float64

In [21]:
x = data['1/N'] - data['rf']
X = sm.add_constant(x)

y = data['ADS.DE'] - data['rf']

result = sm.OLS(y,X).fit()

result.summary() #46% explained by 7 Factors

0,1,2,3
Dep. Variable:,y,R-squared:,0.314
Model:,OLS,Adj. R-squared:,0.314
Method:,Least Squares,F-statistic:,2414.0
Date:,"Sat, 30 Jan 2021",Prob (F-statistic):,0.0
Time:,14:26:11,Log-Likelihood:,14397.0
No. Observations:,5285,AIC:,-28790.0
Df Residuals:,5283,BIC:,-28780.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0217,0.002,-13.577,0.000,-0.025,-0.019
0,0.7789,0.016,49.134,0.000,0.748,0.810

0,1,2,3
Omnibus:,971.396,Durbin-Watson:,1.978
Prob(Omnibus):,0.0,Jarque-Bera (JB):,22372.069
Skew:,0.211,Prob(JB):,0.0
Kurtosis:,13.071,Cond. No.,73.3


In [22]:
result.pvalues.values[1:]

array([0.])

AutoRegression and Vectorautoregression

In [23]:
#always use zscored input to make it stationary and have an impulse shock of 1 standard deviation
model = VAR(data_es50.apply(zscore))



In [24]:
x = model.select_order(maxlags=12)
x.summary() #choose values lag length with star

0,1,2,3,4
,AIC,BIC,FPE,HQIC
0.0,-27.72,-27.67*,9.122e-13,-27.70*
1.0,-28.09,-25.74,6.288e-13,-27.27
2.0,-28.20,-23.54,5.637e-13,-26.58
3.0,-28.21*,-21.25,5.604e-13*,-25.78
4.0,-28.19,-18.93,5.697e-13,-24.95
5.0,-28.19,-16.62,5.712e-13,-24.15
6.0,-28.16,-14.28,5.912e-13,-23.31
7.0,-28.09,-11.91,6.376e-13,-22.43
8.0,-28.04,-9.559,6.667e-13,-21.58


In [25]:
model_fitted = model.fit(1) # VAR(1)
model_fitted.summary()

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Sat, 30, Jan, 2021
Time:                     14:26:29
--------------------------------------------------------------------
No. of Equations:         43.0000    BIC:                   -25.5450
Nobs:                     5284.00    HQIC:                  -27.0758
Log likelihood:          -246800.    FPE:                7.65436e-13
AIC:                     -27.8983    Det(Omega_mle):     5.35856e-13
--------------------------------------------------------------------
Results for equation ADS.DE
                 coefficient       std. error           t-stat            prob
------------------------------------------------------------------------------
const               0.000216         0.013707            0.016           0.987
L1.ADS.DE           0.030131         0.016905            1.782           0.075
L1.AI.PA           -0.015239         0.020784           -0.733     

Impulse Response Function

In [26]:

periods = 5

model_fitted.irf(periods).plot(impulse='0.25 Y', response='1/N', orth=True, stderr_type="asym", signif=0.05) #normalerweise ortho=True, außer bei irf zwischen principal component und ursprünglichen vektor

ValueError: '0.25 Y' is not in list

Granger Causality Check

In [None]:
from statsmodels.tsa.stattools import grangercausalitytests
maxlag=12
test = 'ssr_chi2test'
def grangers_causation_matrix(data, variables, test='ssr_chi2test', verbose=False):    
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.

    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

granger_result = grangers_causation_matrix(df_1n_ecb, variables= df_1n_ecb.columns) #low p-value mean relationship

ARMA(p,q)

In [None]:
#check for predictability
'''returns: return array i.e. returns.values'''
def selectARMAorder(max_p,max_q,returns):
    bic_vals = np.zeros((max_p+1,max_q+1))
    for i in range(max_p+1):
        for j in range(max_p+1):
            arma_model = ARMA(returns,(i,j))
            bic_vals[i,j] = arma_model.fit().bic
    opt = np.argmin(bic_vals)
    opt_model = np.unravel_index(opt, bic_vals.shape)
    return opt_model


In [None]:
opt_arma_order = selectARMAorder(2,2,r_t.values)
print(opt_arma_order) # (0,0) indicates constant mean fits best according to bic
arma_model = ARMA(r_t,opt_arma_order)
arma_result = arma_model.fit()

R_Squared as Variance Measure

Stochastic Volatility Models: Harvey & Garch

In [None]:
#Garch
from arch import arch_model 

##GARCH(1,1) 
equityVol =  arch_model(zscore(data['1/N']), p=1, q=1, mean='zero', vol = 'GARCH', dist = 'normal')  
equityVol_result = equityVol.fit(update_freq=4)


data["data_equity_garch_vol"] =  equityVol_result.conditional_volatility

In [None]:
#Harvey
# https://mc-stan.org/docs/2_21/stan-users-guide/stochastic-volatility-models.html
%run Helper_SVM_Harvey.ipynb

In [None]:
svm = QLSV(data['1/N'] - data['1/N'].mean()) #Either you use zscore for the vol model or demeaned returns; In case of zscore, the volatility is in terms of standard deviations
svm_result = svm.fit(cov_type='robust')

data["data_equity_harvey_cond_vol"] = np.exp(svm_result.smoothed_state[0] / 2) #nur smoothed_state[0] ist die log volatilty

In [None]:
print(svm_result.summary())

Fama/MacBeth Model

In [97]:
%run Helper_FamaMacBeth.ipynb

In [None]:
import copy

class FMacBethAlex():
    def __init__(self, factor_returns, returns, risk_free_rate):
        #input: returns: return panel, pandas
        #input: factor_returns: factor panel, pandas
        #input: rf: risk-free rate, pandas series
        
        self.r = copy.copy(returns)
        self.f = copy.copy(factor_returns)
        self.rf = copy.copy(risk_free_rate)
        
        self.beta_panel = pd.DataFrame()
        
        self.lambda_panel = pd.DataFrame()
        
        self.r_squared_beta_panel = {}
        
        self.beta_pvalues = {}
        
        self.lambda_pvalues = {}
        
        self.r_squared_lambda_panel = {}
        
        self.df_betas_internal = {}
        
        
    def compute_betas(self):
    
        df_betas = pd.DataFrame()
        temp_beta_array = []
        
        adj_r = []
        
        pvalues = []
        
        for stock in self.r.columns:
            
        
            x = self.f
            X = sm.add_constant(x)
            y = self.r[stock] - self.rf
            result = sm.GLS(y,X).fit()
            
            #save beta/coefficient for each stock
            temp_beta_array.append(result.params.values[1:])
            #print(stock,': ', result.rsquared_adj)
            adj_r.append(result.rsquared_adj)
            
            pvalues.append(result.pvalues.values[1:])
            
            #print('Max: ' ,max(adj_r))
            
            #df_betas[factor] = temp_beta_array
        
        
        self.r_squared_beta_panel = adj_r
        df_betas = pd.DataFrame(data = temp_beta_array , columns=self.f.columns)
        df_betas_pvalues = pd.DataFrame(data = pvalues , columns=self.f.columns)
        #set stock names as index
        df_betas.set_index(returns.columns, inplace=True)
    
        self.df_betas_internal = df_betas.copy()
    
        df_betas['r_squared'] = adj_r
        
    
        #return betas
        self.beta_pvalues = df_betas_pvalues.copy()
        self.beta_panel = df_betas.copy()
        
    def cross_regress(self):
    
        #betas are x
        x = self.df_betas_internal
        X = sm.add_constant(x)
        
        temp_coeff_array = []
        i = 0
        
        lambda_adj_r = []
        pvalues = []
        
        for row in self.r.to_numpy():
            y = row - self.rf.iloc[i]
            result = sm.GLS(y, X).fit()
            temp_coeff_array.append(result.params.values[1:])
            i = i + 1
            lambda_adj_r.append(result.rsquared_adj)
            pvalues.append(result.pvalues.values[1:])
        
        self.lambda_pvalues = pd.DataFrame(data = pvalues, columns = self.df_betas_internal.columns, index=self.r.index)
        
        market_premiums = pd.DataFrame(data = temp_coeff_array, columns = self.df_betas_internal.columns, index=self.r.index)
        
        market_premiums['r_squared']= lambda_adj_r
        
        self.lambda_panel =  market_premiums.copy()
        

def compute_betas(factor_returns, returns, risk_free_rate):
    
    df_betas = pd.DataFrame()
    temp_beta_array = []
    
    adj_r = []
    
    for stock in returns.columns:
        
    
        x = factor_returns
        X = sm.add_constant(x)
        y = returns[stock] - risk_free_rate
        result = sm.GLS(y,X).fit()
        
        #save beta/coefficient for each stock
        temp_beta_array.append(result.params.values[1:])
        print(stock,': ',result.rsquared_adj)
        
        adj_r.append(stock +': ' + str(result.rsquared_adj))
        
        #print('Max: ' ,max(adj_r))
        
        #df_betas[factor] = temp_beta_array
    
    df_betas = pd.DataFrame(data = temp_beta_array, columns=factor_returns.columns)
    #set stock names as index
    df_betas.set_index(self.r.columns, inplace=True)
    
    #return betas
    return df_betas 

def cross_regress(df_betas, returns, risk_free_rate):
    
    #betas are x
    x = df_betas
    X = sm.add_constant(x)
    
    temp_coeff_array = []
    i = 0
    
    for row in returns.to_numpy():
        y = row - risk_free_rate.iloc[i]
        result = sm.GLS(y, X).fit()
        temp_coeff_array.append(result.params.values[1:])
        i = i + 1
        
    
    market_premiums = pd.DataFrame(data = temp_coeff_array, columns = df_betas.columns)
    
    #market_premiums = market_premiums.agg(func=['mean'])
    
    return market_premiums

In [None]:
returns = data.iloc[:, :43]

market_factor = data['1/N'] # just the market factor here i.e. CAPM

rf = pd.DataFrame(data=data['rf'], index=data.index) #is zero here

In [None]:
fMacBeth = FMacBeth(returns, market_factor.to_frame(), rf) # (return panel, factor panel, risk-free panel )

fMacBeth.TSRegression_step1()

fMacBeth.CSRegression_step2()

In [None]:
fmacAlex = FMacBethAlex(market_factor.to_frame(), returns, rf['rf'])

fmacAlex.compute_betas()

fmacAlex.cross_regress()

fmacAlex.beta_panel

In [None]:
fmacAlex.beta_pvalues

In [None]:
fmacAlex.lambda_panel

In [None]:
fmacAlex.lambda_pvalues

Options

In [None]:
from black_scholes import IV_BS_Newton

In [None]:
sap_atm_options['bs_impl_vol'] = IV_BS_Newton(sap_atm_options["put_price"].values, sap_atm_options["futures_price"].values,\
                sap_atm_options["exercise_price"].values, sap_atm_options["dtm"].values/360,\
                sap_atm_options["rf"].values,type_='P', underlying='future')

Kalman Filter

Stationarity Test / Adfuller

In [30]:
def adfuller_test(series, signif=0.05, name='', verbose=False):
    """Perform ADFuller to test for Stationarity of given series and print report"""
    r = adfuller(series, autolag='BIC')
    output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
    p_value = output['pvalue'] 
    def adjust(val, length= 6): return str(val).ljust(length)

    # Print Summary
    print(f'    Augmented Dickey-Fuller Test on "{name}"', "\n   ", '-'*47)
    print(f' Null Hypothesis: Data has unit root. Non-Stationary.')
    print(f' Significance Level    = {signif}')
    print(f' Test Statistic        = {output["test_statistic"]}')
    print(f' No. Lags Chosen       = {output["n_lags"]}')

    for key,val in r[4].items():
        print(f' Critical value {adjust(key)} = {round(val, 3)}')

    if p_value <= signif:
        print(f" => P-Value = {p_value}. Rejecting Null Hypothesis.")
        print(f" => Series is Stationary.")
    else:
        print(f" => P-Value = {p_value}. Weak evidence to reject the Null Hypothesis.")
        print(f" => Series is Non-Stationary.")

In [31]:
for name, column in data.iteritems():
    adfuller_test(column, name=column.name)
    print('\n')

    Augmented Dickey-Fuller Test on "ADS.DE" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -70.3869
 No. Lags Chosen       = 0
 Critical value 1%     = -3.432
 Critical value 5%     = -2.862
 Critical value 10%    = -2.567
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


    Augmented Dickey-Fuller Test on "AI.PA" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -81.5587
 No. Lags Chosen       = 0
 Critical value 1%     = -3.432
 Critical value 5%     = -2.862
 Critical value 10%    = -2.567
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


    Augmented Dickey-Fuller Test on "ALV.DE" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    

    Augmented Dickey-Fuller Test on "FRE.DE" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -76.7203
 No. Lags Chosen       = 0
 Critical value 1%     = -3.432
 Critical value 5%     = -2.862
 Critical value 10%    = -2.567
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


    Augmented Dickey-Fuller Test on "BN.PA" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -46.3076
 No. Lags Chosen       = 2
 Critical value 1%     = -3.432
 Critical value 5%     = -2.862
 Critical value 10%    = -2.567
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


    Augmented Dickey-Fuller Test on "IBE.MC" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    

    Augmented Dickey-Fuller Test on "DG.PA" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -36.5497
 No. Lags Chosen       = 4
 Critical value 1%     = -3.432
 Critical value 5%     = -2.862
 Critical value 10%    = -2.567
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


    Augmented Dickey-Fuller Test on "VIV.PA" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    = 0.05
 Test Statistic        = -37.6228
 No. Lags Chosen       = 4
 Critical value 1%     = -3.432
 Critical value 5%     = -2.862
 Critical value 10%    = -2.567
 => P-Value = 0.0. Rejecting Null Hypothesis.
 => Series is Stationary.


    Augmented Dickey-Fuller Test on "VOW.DE" 
    -----------------------------------------------
 Null Hypothesis: Data has unit root. Non-Stationary.
 Significance Level    

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


In [None]:
#Make Stationary by differentiating:
data_stat = data.diff().dropna()

In [None]:
#Make Stationary by zscore (Changes distribution):
zscore(data)

Normalverteilung/Jarque-Berra Test

In [29]:
stats.jarque_bera(data["ADS.DE"]) #Null Hypothese nimmt Normalverteilung an. pvalue=0.1: 10% Wahrscheinlichkeit Nullhypothese ist korrekt.

Jarque_beraResult(statistic=12784.531958116217, pvalue=0.0)

Systematic Risk / Unsystematic Risk

In [99]:

'''
data_std_mean = data.agg(['mean', 'std']).T
data_std_mean['corrWith1/N'] = data.corr()['1/N'] 
data_std_mean['Total.Risk'] = (data_std_mean['std'] ** 2) * 252

data_std_mean['System.Risk'] = data_std_mean['std']['1/N'] * data_std_mean['std'] * data_std_mean['corrWith1/N']

data_std_mean['System.Risk'] = data_std_mean['System.Risk'] * 252
data_std_mean['System.Risk.Ratio'] =  data_std_mean['System.Risk'] / data_std_mean['Total.Risk']

'''

In [95]:
X = sm.add_constant(data['1/N'].copy())

gls = sm.GLS(data['AI.PA'], X)

result = gls.fit()

result.summary() #r_squared is systematic risk, 1/N coefficient is beta

0,1,2,3
Dep. Variable:,AI.PA,R-squared:,0.529
Model:,GLS,Adj. R-squared:,0.529
Method:,Least Squares,F-statistic:,5933.0
Date:,"Sat, 30 Jan 2021",Prob (F-statistic):,0.0
Time:,17:54:59,Log-Likelihood:,16434.0
No. Observations:,5285,AIC:,-32860.0
Df Residuals:,5283,BIC:,-32850.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0003,0.000,2.130,0.033,2.51e-05,0.001
1/N,0.8304,0.011,77.028,0.000,0.809,0.852

0,1,2,3
Omnibus:,1106.368,Durbin-Watson:,2.215
Prob(Omnibus):,0.0,Jarque-Bera (JB):,18619.991
Skew:,0.537,Prob(JB):,0.0
Kurtosis:,12.133,Cond. No.,72.6


Correlation Analysis

In [94]:
np.corrcoef(data['AI.PA'], data['1/N'])

array([[1.        , 0.72731644],
       [0.72731644, 1.        ]])

In [102]:
data.corr()

Unnamed: 0,ADS.DE,AI.PA,ALV.DE,ASML.AS,CS.PA,BBVA.MC,SAN.MC,BAS.DE,BAYN.DE,BMW.DE,...,SU.PA,SIE.DE,TEF.MC,FP.PA,URW.AS,UNA.AS,DG.PA,VIV.PA,VOW.DE,1/N
ADS.DE,1.0,0.388807,0.450709,0.304632,0.394184,0.387748,0.389301,0.480341,0.3808,0.445458,...,0.427496,0.415266,0.309397,0.386752,0.327038,0.316999,0.391919,0.295164,0.25205,0.56004
AI.PA,0.388807,1.0,0.552326,0.402729,0.53664,0.550599,0.558219,0.620586,0.524982,0.504605,...,0.551683,0.474288,0.471666,0.563838,0.408094,0.427068,0.497231,0.438899,0.310669,0.727316
ALV.DE,0.450709,0.552326,1.0,0.426367,0.666409,0.638084,0.643762,0.6589,0.539662,0.572547,...,0.531487,0.591426,0.525507,0.564897,0.423604,0.376973,0.517459,0.48193,0.290527,0.798634
ASML.AS,0.304632,0.402729,0.426367,1.0,0.406535,0.431757,0.43078,0.407899,0.369855,0.366333,...,0.375544,0.482007,0.430863,0.350195,0.241542,0.211998,0.302714,0.437071,0.272956,0.59414
CS.PA,0.394184,0.53664,0.666409,0.406535,1.0,0.638593,0.636118,0.551941,0.438445,0.518278,...,0.525146,0.495699,0.498211,0.530296,0.405684,0.348002,0.484518,0.475218,0.308257,0.754072
BBVA.MC,0.387748,0.550599,0.638084,0.431757,0.638593,1.0,0.884744,0.586982,0.478315,0.546993,...,0.560173,0.536035,0.693799,0.573287,0.457459,0.359932,0.526697,0.502539,0.344714,0.810478
SAN.MC,0.389301,0.558219,0.643762,0.43078,0.636118,0.884744,1.0,0.599112,0.481706,0.546742,...,0.570099,0.53871,0.683525,0.588674,0.438306,0.362564,0.511141,0.497971,0.347639,0.809901
BAS.DE,0.480341,0.620586,0.6589,0.407899,0.551941,0.586982,0.599112,1.0,0.645951,0.605615,...,0.57457,0.59855,0.480451,0.592251,0.433954,0.401348,0.535577,0.409875,0.328096,0.770802
BAYN.DE,0.3808,0.524982,0.539662,0.369855,0.438445,0.478315,0.481706,0.645951,1.0,0.47702,...,0.433837,0.487512,0.430059,0.493186,0.32598,0.380498,0.415685,0.396117,0.284673,0.66346
BMW.DE,0.445458,0.504605,0.572547,0.366333,0.518278,0.546993,0.546742,0.605615,0.47702,1.0,...,0.524589,0.512169,0.440065,0.487532,0.400592,0.355709,0.468218,0.380845,0.441689,0.709225
