In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
data = pd.read_csv('data.csv')
data.drop(['beta_market_ff3','beta_smb','beta_hml'],axis=1,inplace=True)

In [3]:
control_data = pd.read_csv('size.csv')
control_data.drop(['PERMNO','UGVKEY','datadate','indfmt','consol','popsrc','datafmt','curcd','costat'],axis=1,inplace=True)
control_data = control_data.set_axis(['date','market_cap','trading_volume','ticker'], axis=1)

In [4]:
## Merge the dataFrame
data = pd.merge(data,control_data, on=['ticker','date'], how='outer').dropna(axis=0).reset_index(drop=True)

In [5]:
## Compute log(market cap) and illiquidity
data['log_marcap'] = np.log10(data['market_cap'])
data['illiquidity'] = np.abs(data['return'])/data['trading_volume']
data.drop(['market_cap','trading_volume'],axis=1,inplace=True)

## multiply future return by 100, for calculation. Otherwise, regression results are too small and can not be eaily understood
data['future_return'] = data['future_return']*100
data['return'] = data['return']*100
data['illiquidity'] = data['illiquidity']*10000

In [6]:
data

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,secid,date,CVOL,cusip,ticker,PVOL,return,beta_market,capm_ahpha,ff3_alpha,delta_CVOL,delta_PVOL,future_return,IVS,log_marcap,illiquidity
0,13.0,13.0,7621.0,2018-02-28,3.245288,40251W40,GURE,1.303142,1.3333,-0.1471,-0.0184,-0.0169,0.003966,0.007276,-5.9211,1.942146,4.852127,0.000142
1,14.0,14.0,7621.0,2018-03-29,2.686095,40251W40,GURE,2.964433,-5.9211,-0.0227,-0.0166,-0.0249,-0.559193,1.661291,-2.7972,-0.278338,4.825619,0.000343
2,15.0,15.0,7621.0,2018-04-30,2.316908,40251W40,GURE,1.677338,-2.7972,-0.0098,-0.0215,-0.0234,-0.369187,-1.287095,2.8777,0.639570,4.813298,0.000321
3,16.0,16.0,7621.0,2018-05-31,5.144718,40251W40,GURE,1.905440,2.8777,0.0963,-0.0177,-0.0216,2.827810,0.228102,-3.4965,3.239278,4.825619,0.000334
4,17.0,17.0,7621.0,2018-06-29,5.472939,40251W40,GURE,2.652440,-3.4965,0.0669,-0.0119,-0.0180,0.328221,0.747000,-5.7971,2.820499,4.810162,0.000520
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246397,268418.0,254598.0,217521.0,2022-11-30,1.009729,15961010,CRGE,2.685047,-4.5045,0.3088,-0.1257,-0.1517,-1.601025,-1.426354,-41.5094,-1.675318,5.641561,0.000031
246398,268448.0,254631.0,217580.0,2022-11-30,4.540307,83610010,SOUN,5.644150,-52.7675,1.1133,-0.1303,-0.1912,3.138943,2.170758,38.2813,-1.103843,5.304540,0.000426
246399,268486.0,254678.0,217839.0,2022-09-30,1.786786,00974B10,AKLI,7.317552,-49.4407,1.1890,-0.0686,-0.0835,-0.431829,4.881837,-2.6549,-5.530766,5.245417,0.000858
246400,268487.0,254679.0,217839.0,2022-10-31,5.232100,00974B10,AKLI,4.529993,-2.6549,1.1002,-0.0734,-0.0841,3.445314,-2.787559,-33.6364,0.702107,5.233732,0.000100


## CVOL

In [7]:
y = data['future_return']
X = data[['CVOL','beta_market','log_marcap','illiquidity']]
X_new = sm.add_constant(X)
model = sm.OLS(y, X_new)
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,future_return,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,67.55
Date:,"Sat, 07 Sep 2024",Prob (F-statistic):,3.11e-57
Time:,13:38:13,Log-Likelihood:,-1043000.0
No. Observations:,246402,AIC:,2086000.0
Df Residuals:,246397,BIC:,2086000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.0201,0.286,-7.062,0.000,-2.581,-1.459
CVOL,0.3066,0.041,7.458,0.000,0.226,0.387
beta_market,0.3636,0.027,13.428,0.000,0.311,0.417
log_marcap,0.2873,0.044,6.561,0.000,0.202,0.373
illiquidity,6.8779,5.206,1.321,0.186,-3.326,17.082

0,1,2,3
Omnibus:,371401.972,Durbin-Watson:,1.911
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2504526722.787
Skew:,8.34,Prob(JB):,0.0
Kurtosis:,496.627,Cond. No.,993.0


## PVOL

In [8]:
y = data['future_return']
X = data[['PVOL','beta_market','log_marcap','illiquidity']]
X_new = sm.add_constant(X)
model = sm.OLS(y, X_new)
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,future_return,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,54.57
Date:,"Sat, 07 Sep 2024",Prob (F-statistic):,4.57e-46
Time:,13:38:13,Log-Likelihood:,-1043000.0
No. Observations:,246402,AIC:,2086000.0
Df Residuals:,246397,BIC:,2086000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.6287,0.282,-2.227,0.026,-1.182,-0.075
PVOL,-0.0846,0.044,-1.942,0.052,-0.170,0.001
beta_market,0.3877,0.027,14.324,0.000,0.335,0.441
log_marcap,0.1020,0.043,2.356,0.018,0.017,0.187
illiquidity,3.9715,5.206,0.763,0.446,-6.232,14.175

0,1,2,3
Omnibus:,372367.835,Durbin-Watson:,1.913
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2512094874.162
Skew:,8.389,Prob(JB):,0.0
Kurtosis:,497.369,Cond. No.,992.0


## CVOL and PVOL

In [9]:
y = data['future_return']
X = data[['CVOL','PVOL','beta_market','log_marcap','illiquidity']]
X_new = sm.add_constant(X)
model = sm.OLS(y, X_new)
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,future_return,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,75.64
Date:,"Sat, 07 Sep 2024",Prob (F-statistic):,1.71e-79
Time:,13:38:13,Log-Likelihood:,-1042900.0
No. Observations:,246402,AIC:,2086000.0
Df Residuals:,246396,BIC:,2086000.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.4970,0.290,-5.155,0.000,-2.066,-0.928
CVOL,0.7422,0.059,12.640,0.000,0.627,0.857
PVOL,-0.6462,0.062,-10.387,0.000,-0.768,-0.524
beta_market,0.3720,0.027,13.734,0.000,0.319,0.425
log_marcap,0.2199,0.044,4.969,0.000,0.133,0.307
illiquidity,5.7514,5.206,1.105,0.269,-4.453,15.956

0,1,2,3
Omnibus:,371882.581,Durbin-Watson:,1.912
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2511813940.663
Skew:,8.364,Prob(JB):,0.0
Kurtosis:,497.343,Cond. No.,998.0


## IVS

In [10]:
y = data['future_return']
X = data[['IVS','beta_market','log_marcap','illiquidity']]
X_new = sm.add_constant(X)
model = sm.OLS(y, X_new)
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,future_return,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,93.45
Date:,"Sat, 07 Sep 2024",Prob (F-statistic):,1.4600000000000003e-79
Time:,13:38:14,Log-Likelihood:,-1042900.0
No. Observations:,246402,AIC:,2086000.0
Df Residuals:,246397,BIC:,2086000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.1693,0.245,-4.779,0.000,-1.649,-0.690
IVS,0.7039,0.056,12.615,0.000,0.595,0.813
beta_market,0.3776,0.027,14.010,0.000,0.325,0.430
log_marcap,0.1764,0.039,4.513,0.000,0.100,0.253
illiquidity,5.0648,5.196,0.975,0.330,-5.119,15.249

0,1,2,3
Omnibus:,372117.214,Durbin-Watson:,1.912
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2513873102.278
Skew:,8.376,Prob(JB):,0.0
Kurtosis:,497.545,Cond. No.,986.0


## delta CVOL

In [11]:
y = data['future_return']
X = data[['delta_CVOL','beta_market','log_marcap','illiquidity']]
X_new = sm.add_constant(X)
model = sm.OLS(y, X_new)
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,future_return,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,56.42
Date:,"Sat, 07 Sep 2024",Prob (F-statistic):,1.19e-47
Time:,13:38:14,Log-Likelihood:,-1043000.0
No. Observations:,246402,AIC:,2086000.0
Df Residuals:,246397,BIC:,2086000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.9292,0.244,-3.809,0.000,-1.407,-0.451
delta_CVOL,0.1869,0.056,3.337,0.001,0.077,0.297
beta_market,0.3831,0.027,14.209,0.000,0.330,0.436
log_marcap,0.1422,0.039,3.645,0.000,0.066,0.219
illiquidity,4.5528,5.197,0.876,0.381,-5.634,14.740

0,1,2,3
Omnibus:,372120.322,Durbin-Watson:,1.911
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2511361995.243
Skew:,8.376,Prob(JB):,0.0
Kurtosis:,497.298,Cond. No.,986.0


## delta PVOL

In [12]:
y = data['future_return']
X = data[['delta_PVOL','beta_market','log_marcap','illiquidity']]
X_new = sm.add_constant(X)
model = sm.OLS(y, X_new)
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,future_return,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,56.24
Date:,"Sat, 07 Sep 2024",Prob (F-statistic):,1.6800000000000002e-47
Time:,13:38:14,Log-Likelihood:,-1043000.0
No. Observations:,246402,AIC:,2086000.0
Df Residuals:,246397,BIC:,2086000.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.8800,0.244,-3.607,0.000,-1.358,-0.402
delta_PVOL,-0.1916,0.059,-3.230,0.001,-0.308,-0.075
beta_market,0.3829,0.027,14.203,0.000,0.330,0.436
log_marcap,0.1348,0.039,3.457,0.001,0.058,0.211
illiquidity,4.5723,5.197,0.880,0.379,-5.615,14.759

0,1,2,3
Omnibus:,372180.733,Durbin-Watson:,1.914
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2508711677.086
Skew:,8.379,Prob(JB):,0.0
Kurtosis:,497.037,Cond. No.,986.0


## delta_CVOL and delta_PVOL

In [15]:
y = data['future_return']
X = data[['delta_CVOL','delta_PVOL','beta_market','log_marcap','illiquidity']]
X_new = sm.add_constant(X)
model = sm.OLS(y, X_new)
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,future_return,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,48.65
Date:,"Sat, 07 Sep 2024",Prob (F-statistic):,1.64e-50
Time:,13:49:49,Log-Likelihood:,-1043000.0
No. Observations:,246402,AIC:,2086000.0
Df Residuals:,246396,BIC:,2086000.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.9041,0.244,-3.705,0.000,-1.382,-0.426
delta_CVOL,0.2471,0.058,4.274,0.000,0.134,0.360
delta_PVOL,-0.2566,0.061,-4.191,0.000,-0.377,-0.137
beta_market,0.3829,0.027,14.205,0.000,0.330,0.436
log_marcap,0.1385,0.039,3.550,0.000,0.062,0.215
illiquidity,4.5810,5.197,0.881,0.378,-5.605,14.767

0,1,2,3
Omnibus:,372129.466,Durbin-Watson:,1.912
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2509907673.536
Skew:,8.377,Prob(JB):,0.0
Kurtosis:,497.155,Cond. No.,986.0


## delta_CVOL, delta_PVOL and IVS

In [14]:
y = data['future_return']
X = data[['delta_CVOL','delta_PVOL','IVS','beta_market','log_marcap','illiquidity']]
X_new = sm.add_constant(X)
model = sm.OLS(y, X_new)
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,future_return,R-squared:,0.002
Model:,OLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,63.93
Date:,"Sat, 07 Sep 2024",Prob (F-statistic):,1.0900000000000002e-79
Time:,13:38:14,Log-Likelihood:,-1042900.0
No. Observations:,246402,AIC:,2086000.0
Df Residuals:,246395,BIC:,2086000.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.2199,0.245,-4.971,0.000,-1.701,-0.739
delta_CVOL,-0.1887,0.069,-2.753,0.006,-0.323,-0.054
delta_PVOL,0.1847,0.072,2.577,0.010,0.044,0.325
IVS,0.8413,0.071,11.840,0.000,0.702,0.981
beta_market,0.3766,0.027,13.973,0.000,0.324,0.429
log_marcap,0.1836,0.039,4.686,0.000,0.107,0.260
illiquidity,5.1437,5.196,0.990,0.322,-5.040,15.328

0,1,2,3
Omnibus:,372135.499,Durbin-Watson:,1.913
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2514677168.306
Skew:,8.376,Prob(JB):,0.0
Kurtosis:,497.625,Cond. No.,986.0
