In [748]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

**O Nam (Oscar) Chim**

#  Reversal Strategy by Size and Market Closure 

**Idea:**

Inspired by Della Corte, Kosowski, and Wang's "Market Closure & Reversal" (2016), I examine the effect of reversal strategy in etfs with different market cap using different return metrices such as OC(DayR)-OC, CO(NightR)-CO, CC(RET from close)-CC, OO(RET from open)-OO.

Data were acquired from CRSP.

First, I will clean the data and make sure the dataset contain no missing values. I use 2 year data from 2016 to 2020 to look at the most recent data. On each date, there might be different total number of etfs since the market is efficient, i.e. stocks are being listed and delisted. 

Second, divide etfs into 4 groups based on their market cap since liquidity/size is suspected to be a factor driving future daily returns.

Third, use different return metrices to forecast future return with fama-macbeth regression.

Forth, find the metric that has the most significant signal and apply it using a long-short strategy if it is applicable.

Last, back test the strategy using data from 2021 to 2022.

# 1. Cleaning Data

I will use daily returns as signals and use a 2 year dataset from 2016 to 2020 without looking too far backward. 

In [749]:
etf=pd.read_feather("etfdata_new.feather")
etf=etf.drop(columns=["COMNAM","VOL","TICKER","SICCD","ASKHI","BIDLO","BID","ASK","RETX","DIVAMT"])
etf = etf.loc[(etf["SHRCD"] == 73) & ( etf["DATE"] <= "2020-01-01") &  ( "2016-01-01" <= etf["DATE"])]
etf.head()

Unnamed: 0,PERMNO,DATE,SHRCD,PRC,OPENPRC,RET,SHROUT
87119,10113.0,2016-01-04,73.0,37.714298,40.439999,-0.021145,500.0
87120,10113.0,2016-01-05,73.0,37.710098,37.709999,-0.000111,500.0
87121,10113.0,2016-01-06,73.0,37.139999,37.48,-0.015118,500.0
87122,10113.0,2016-01-07,73.0,-36.130001,,-0.027194,500.0
87123,10113.0,2016-01-08,73.0,-35.744999,,-0.010656,500.0


In [750]:
etf["PRC"] = np.abs(etf["PRC"])
etf["MKCAP"]=etf["SHROUT"]*etf["PRC"]
#etf["LAGPRC"] = etf.set_index("PERMNO")["PRC"].groupby("PERMNO").shift()


In [751]:
etf


Unnamed: 0,PERMNO,DATE,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP
87119,10113.0,2016-01-04,73.0,37.714298,40.439999,-0.021145,500.0,18857.149124
87120,10113.0,2016-01-05,73.0,37.710098,37.709999,-0.000111,500.0,18855.049133
87121,10113.0,2016-01-06,73.0,37.139999,37.480000,-0.015118,500.0,18569.999695
87122,10113.0,2016-01-07,73.0,36.130001,,-0.027194,500.0,18065.000534
87123,10113.0,2016-01-08,73.0,35.744999,,-0.010656,500.0,17872.499466
...,...,...,...,...,...,...,...,...
41309176,93421.0,2019-12-24,73.0,26.879999,26.809999,0.003730,4150.0,111551.996517
41309177,93421.0,2019-12-26,73.0,26.934999,26.959999,0.002046,4150.0,111780.247784
41309178,93421.0,2019-12-27,73.0,27.044901,27.000000,0.004080,4150.0,112236.338711
41309179,93421.0,2019-12-30,73.0,27.065001,27.070000,0.000743,4150.0,112319.752216


In [752]:
#etf["DayR"] = etf["PRC"]/etf["OPENPRC"] - 1
#etf["NightR"] = etf["OPENPRC"]/etf["LAGPRC"] - 1

In [753]:
#(1+etf["NightR"])*(1+etf["DayR"])-1

A list containing PERMNO that has NA values in open price. They need to be dropped.

In [754]:
NAPERMNO=etf.loc[etf["OPENPRC"].isna()]["PERMNO"].unique().tolist()
len(NAPERMNO)

1618

In [755]:
etf=etf.loc[~etf["PERMNO"].isin(NAPERMNO)]
etf

Unnamed: 0,PERMNO,DATE,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP
723742,11182.0,2016-01-04,73.0,6.420000,6.250000,0.045603,5836.0,37467.120445
723743,11182.0,2016-01-05,73.0,6.150000,6.240000,-0.042056,5836.0,35891.400557
723744,11182.0,2016-01-06,73.0,4.570000,5.460000,-0.256911,5836.0,26670.521002
723745,11182.0,2016-01-07,73.0,4.000000,4.120000,-0.124727,5836.0,23344.000000
723746,11182.0,2016-01-08,73.0,4.310000,4.210000,0.077500,5836.0,25153.159666
...,...,...,...,...,...,...,...,...
41309176,93421.0,2019-12-24,73.0,26.879999,26.809999,0.003730,4150.0,111551.996517
41309177,93421.0,2019-12-26,73.0,26.934999,26.959999,0.002046,4150.0,111780.247784
41309178,93421.0,2019-12-27,73.0,27.044901,27.000000,0.004080,4150.0,112236.338711
41309179,93421.0,2019-12-30,73.0,27.065001,27.070000,0.000743,4150.0,112319.752216


In [756]:
NAPERMNO=etf.loc[etf["PRC"].isna()]["PERMNO"].unique().tolist()
len(NAPERMNO)

0

In [757]:
etf1=etf.set_index(["PERMNO","DATE"]).sort_index()
etf1["LagPRC"]=etf1.groupby("PERMNO")["PRC"].shift()
etf1["LagOPR"] = etf1.groupby("PERMNO")["OPENPRC"].shift()

In [758]:
etf1

Unnamed: 0_level_0,Unnamed: 1_level_0,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP,LagPRC,LagOPR
PERMNO,DATE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
11182.0,2016-01-04,73.0,6.420000,6.250000,0.045603,5836.0,37467.120445,,
11182.0,2016-01-05,73.0,6.150000,6.240000,-0.042056,5836.0,35891.400557,6.420000,6.250000
11182.0,2016-01-06,73.0,4.570000,5.460000,-0.256911,5836.0,26670.521002,6.150000,6.240000
11182.0,2016-01-07,73.0,4.000000,4.120000,-0.124727,5836.0,23344.000000,4.570000,5.460000
11182.0,2016-01-08,73.0,4.310000,4.210000,0.077500,5836.0,25153.159666,4.000000,4.120000
...,...,...,...,...,...,...,...,...,...
93421.0,2019-12-24,73.0,26.879999,26.809999,0.003730,4150.0,111551.996517,26.780100,26.879999
93421.0,2019-12-26,73.0,26.934999,26.959999,0.002046,4150.0,111780.247784,26.879999,26.809999
93421.0,2019-12-27,73.0,27.044901,27.000000,0.004080,4150.0,112236.338711,26.934999,26.959999
93421.0,2019-12-30,73.0,27.065001,27.070000,0.000743,4150.0,112319.752216,27.044901,27.000000


In [759]:
etf1["DayR"] = etf1["PRC"]/etf1["OPENPRC"] - 1
etf1["NightR"] = etf1["OPENPRC"]/etf1["LagPRC"] - 1
etf1["OPENR"] = etf1["OPENPRC"]/etf1["LagOPR"] - 1

In [760]:
etf1["LagRET"] = etf1.groupby("PERMNO")["RET"].shift()
etf1["LagNightR"] = etf1["NightR"].groupby("PERMNO").shift()
etf1["LagDayR"] = etf1["DayR"].groupby("PERMNO").shift()
etf1["LagOPENR"] =  etf1["OPENR"].groupby("PERMNO").shift()

In [761]:
etf1

Unnamed: 0_level_0,Unnamed: 1_level_0,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP,LagPRC,LagOPR,DayR,NightR,OPENR,LagRET,LagNightR,LagDayR,LagOPENR
PERMNO,DATE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
11182.0,2016-01-04,73.0,6.420000,6.250000,0.045603,5836.0,37467.120445,,,0.027200,,,,,,
11182.0,2016-01-05,73.0,6.150000,6.240000,-0.042056,5836.0,35891.400557,6.420000,6.250000,-0.014423,-0.028037,-0.001600,0.045603,,0.027200,
11182.0,2016-01-06,73.0,4.570000,5.460000,-0.256911,5836.0,26670.521002,6.150000,6.240000,-0.163004,-0.112195,-0.125000,-0.042056,-0.028037,-0.014423,-0.001600
11182.0,2016-01-07,73.0,4.000000,4.120000,-0.124727,5836.0,23344.000000,4.570000,5.460000,-0.029126,-0.098468,-0.245421,-0.256911,-0.112195,-0.163004,-0.125000
11182.0,2016-01-08,73.0,4.310000,4.210000,0.077500,5836.0,25153.159666,4.000000,4.120000,0.023753,0.052500,0.021845,-0.124727,-0.098468,-0.029126,-0.245421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93421.0,2019-12-24,73.0,26.879999,26.809999,0.003730,4150.0,111551.996517,26.780100,26.879999,0.002611,0.001116,-0.002604,-0.002221,0.000335,-0.003716,0.002611
93421.0,2019-12-26,73.0,26.934999,26.959999,0.002046,4150.0,111780.247784,26.879999,26.809999,-0.000927,0.002976,0.005595,0.003730,0.001116,0.002611,-0.002604
93421.0,2019-12-27,73.0,27.044901,27.000000,0.004080,4150.0,112236.338711,26.934999,26.959999,0.001663,0.002413,0.001484,0.002046,0.002976,-0.000927,0.005595
93421.0,2019-12-30,73.0,27.065001,27.070000,0.000743,4150.0,112319.752216,27.044901,27.000000,-0.000185,0.000928,0.002593,0.004080,0.002413,0.001663,0.001484


In [762]:
etf2 = etf1.reorder_levels(["DATE","PERMNO"]).sort_index()
etf2

Unnamed: 0_level_0,Unnamed: 1_level_0,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP,LagPRC,LagOPR,DayR,NightR,OPENR,LagRET,LagNightR,LagDayR,LagOPENR
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2016-01-04,11182.0,73.0,6.420000,6.250000,0.045603,5836.0,3.746712e+04,,,0.027200,,,,,,
2016-01-04,11264.0,73.0,35.529999,35.759998,-0.056057,1700.0,6.040100e+04,,,-0.006432,,,,,,
2016-01-04,11407.0,73.0,19.870001,19.969999,-0.010951,2025.0,4.023675e+04,,,-0.005007,,,,,,
2016-01-04,11996.0,73.0,16.850000,16.870001,-0.008824,67600.0,1.139060e+06,,,-0.001186,,,,,,
2016-01-04,12035.0,73.0,12.155000,11.900000,0.008714,630412.0,7.662658e+06,,,0.021429,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-31,93377.0,73.0,25.660000,25.559999,0.005486,19500.0,5.003700e+05,25.520000,25.690001,0.003912,0.001567,-0.005060,-0.006231,0.000389,-0.006617,-0.001555
2019-12-31,93378.0,73.0,46.369999,45.959999,0.005857,1500.0,6.955500e+04,46.099998,46.595402,0.008921,-0.003037,-0.013637,-0.006251,0.004428,-0.010632,0.002418
2019-12-31,93379.0,73.0,21.080000,21.110001,0.003332,13650.0,2.877420e+05,21.010000,21.120001,-0.001421,0.004760,-0.000473,-0.003793,0.001423,-0.005208,0.005714
2019-12-31,93385.0,73.0,34.160000,34.150002,0.002936,5600.0,1.912960e+05,34.060001,34.029999,0.000293,0.002642,0.003526,-0.001173,-0.002053,0.000882,0.008595


In [763]:
#etf2=etf2.dropna()

On each date, there is different number of stocks listed because some were de-listed and some are newly listed. To avoid suviorship bias, I include all of them in my dataset.

In [764]:
etf2.groupby("DATE").count()


Unnamed: 0_level_0,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP,LagPRC,LagOPR,DayR,NightR,OPENR,LagRET,LagNightR,LagDayR,LagOPENR
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2016-01-04,826,826,826,826,826,826,0,0,826,0,0,0,0,0,0
2016-01-05,826,826,826,826,826,826,826,826,826,826,826,826,0,826,0
2016-01-06,826,826,826,826,826,826,826,826,826,826,826,826,826,826,826
2016-01-07,826,826,826,826,826,826,826,826,826,826,826,826,826,826,826
2016-01-08,826,826,826,826,826,826,826,826,826,826,826,826,826,826,826
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-24,1064,1064,1064,1064,1064,1064,1064,1064,1064,1064,1064,1064,1064,1064,1064
2019-12-26,1065,1065,1065,1065,1065,1065,1065,1065,1065,1065,1065,1065,1065,1065,1065
2019-12-27,1072,1072,1072,1067,1072,1072,1067,1067,1072,1067,1067,1067,1067,1067,1067
2019-12-30,1077,1077,1077,1075,1077,1077,1075,1075,1077,1075,1075,1070,1070,1075,1070


In [765]:
def quintiles(inser):
    outser = pd.qcut(inser, q=4, labels = range(1,5))
    return outser

I first divide the etfs into 4 groups based on their market cap.

In [766]:
etf2["SIZERANK"] = etf2["MKCAP"].groupby("DATE").apply(quintiles)
etf2

Unnamed: 0_level_0,Unnamed: 1_level_0,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP,LagPRC,LagOPR,DayR,NightR,OPENR,LagRET,LagNightR,LagDayR,LagOPENR,SIZERANK
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2016-01-04,11182.0,73.0,6.420000,6.250000,0.045603,5836.0,3.746712e+04,,,0.027200,,,,,,,1
2016-01-04,11264.0,73.0,35.529999,35.759998,-0.056057,1700.0,6.040100e+04,,,-0.006432,,,,,,,1
2016-01-04,11407.0,73.0,19.870001,19.969999,-0.010951,2025.0,4.023675e+04,,,-0.005007,,,,,,,1
2016-01-04,11996.0,73.0,16.850000,16.870001,-0.008824,67600.0,1.139060e+06,,,-0.001186,,,,,,,3
2016-01-04,12035.0,73.0,12.155000,11.900000,0.008714,630412.0,7.662658e+06,,,0.021429,,,,,,,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-31,93377.0,73.0,25.660000,25.559999,0.005486,19500.0,5.003700e+05,25.520000,25.690001,0.003912,0.001567,-0.005060,-0.006231,0.000389,-0.006617,-0.001555,3
2019-12-31,93378.0,73.0,46.369999,45.959999,0.005857,1500.0,6.955500e+04,46.099998,46.595402,0.008921,-0.003037,-0.013637,-0.006251,0.004428,-0.010632,0.002418,1
2019-12-31,93379.0,73.0,21.080000,21.110001,0.003332,13650.0,2.877420e+05,21.010000,21.120001,-0.001421,0.004760,-0.000473,-0.003793,0.001423,-0.005208,0.005714,2
2019-12-31,93385.0,73.0,34.160000,34.150002,0.002936,5600.0,1.912960e+05,34.060001,34.029999,0.000293,0.002642,0.003526,-0.001173,-0.002053,0.000882,0.008595,2


In [767]:
etf2.groupby(["SIZERANK","DATE"]).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP,LagPRC,LagOPR,DayR,NightR,OPENR,LagRET,LagNightR,LagDayR,LagOPENR
SIZERANK,DATE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,2016-01-04,73.0,32.868102,32.908414,-0.009620,2846.352657,6.696734e+04,,,-0.001368,,,,,,
1,2016-01-05,73.0,32.941570,32.943316,0.000944,2847.801932,6.713133e+04,32.868102,32.908414,-0.000823,0.001556,0.000151,-0.009620,,-0.001368,
1,2016-01-06,73.0,32.971591,32.921780,-0.008816,2855.048309,6.646794e+04,32.922560,32.928486,-0.003081,-0.006125,-0.007130,0.000827,0.001662,-0.001042,0.000106
1,2016-01-07,73.0,32.609375,32.702718,-0.015177,2876.893720,6.544931e+04,32.808355,32.759510,-0.004910,-0.010602,-0.013365,-0.009173,-0.006449,-0.003114,-0.007492
1,2016-01-08,73.0,32.485408,32.681802,-0.006098,2882.106280,6.502313e+04,32.629037,32.725762,-0.010571,0.004898,-0.000462,-0.016282,-0.011440,-0.005171,-0.014065
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,2019-12-24,73.0,91.021321,91.033985,0.000691,173812.759398,1.436653e+07,91.018954,91.125940,0.000196,0.000098,-0.000592,0.000880,0.000009,-0.000691,0.000415
4,2019-12-26,73.0,91.335319,91.161203,0.003465,173821.932331,1.442260e+07,91.021321,91.033985,0.001900,0.001554,0.001752,0.000691,0.000098,0.000196,-0.000592
4,2019-12-27,73.0,91.004429,91.268993,0.000014,172877.757463,1.433362e+07,91.028563,90.855336,-0.002509,0.002531,0.004430,0.003459,0.001556,0.001893,0.001763
4,2019-12-30,73.0,90.529127,90.899703,-0.003510,172365.018587,1.422948e+07,90.906086,91.169963,-0.003737,0.000032,-0.002475,0.000018,0.002531,-0.002505,0.004429


# 2. Fama-Macbeth Regression

 **a. CC-CC**

First, I will look at the power of reversal for CC(Ret) - CC(Ret), which means sell and buy only occur at market close.

Use Fama-Macbeth Regression to compute estimated coefficients.

In [768]:
def regfun(df):
    results = smf.ols('RET ~ LagRET', data=df).fit()
    return results.params

In [769]:
params=etf2[["RET","LagRET","SIZERANK"]].dropna().groupby(["SIZERANK","DATE"]).apply(regfun)
params

Unnamed: 0_level_0,Unnamed: 1_level_0,Intercept,LagRET
SIZERANK,DATE,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2016-01-05,-0.000119,-0.110456
1,2016-01-06,-0.009535,0.869781
1,2016-01-07,-0.008529,0.724701
1,2016-01-08,-0.002985,0.191177
1,2016-01-11,-0.005340,0.194864
...,...,...,...
4,2019-12-24,0.000257,0.492552
4,2019-12-26,0.003268,0.285487
4,2019-12-27,0.000243,-0.066432
4,2019-12-30,-0.003508,-0.125112


For each group, there is an estimated coefficient/beta1. Only the smallest market cap group has reversal power as beta1<0.

In [770]:
stats=params[["LagRET"]].groupby("SIZERANK").describe()
stats

Unnamed: 0_level_0,LagRET,LagRET,LagRET,LagRET,LagRET,LagRET,LagRET,LagRET
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
SIZERANK,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,1005.0,-0.024746,0.468821,-2.509499,-0.30372,-0.021414,0.260577,1.560075
2,1005.0,-0.007146,0.460299,-2.452259,-0.25983,-0.000703,0.248186,2.079835
3,1005.0,-0.009363,0.49986,-2.439091,-0.27865,-0.025572,0.269349,2.114454
4,1005.0,-0.009049,0.443937,-2.913458,-0.259511,-0.014841,0.257111,1.906189


In [771]:
gp1=stats.loc[1,]
gp1.loc["tstats"] = gp1.iloc[1]/(gp1.iloc[2]/np.sqrt(gp1.iloc[0]))
gp1

LagRET  count    1005.000000
        mean       -0.024746
        std         0.468821
        min        -2.509499
        25%        -0.303720
        50%        -0.021414
        75%         0.260577
        max         1.560075
tstats             -1.673301
Name: 1, dtype: float64

In [772]:
gp2=stats.loc[2,]
gp2.loc["tstats"] = gp2.iloc[1]/(gp2.iloc[2]/np.sqrt(gp2.iloc[0]))
gp2

LagRET  count    1005.000000
        mean       -0.007146
        std         0.460299
        min        -2.452259
        25%        -0.259830
        50%        -0.000703
        75%         0.248186
        max         2.079835
tstats             -0.492138
Name: 2, dtype: float64

In [773]:
gp3=stats.loc[3,]
gp3.loc["tstats"] = gp3.iloc[1]/(gp3.iloc[2]/np.sqrt(gp3.iloc[0]))
gp3

LagRET  count    1005.000000
        mean       -0.009363
        std         0.499860
        min        -2.439091
        25%        -0.278650
        50%        -0.025572
        75%         0.269349
        max         2.114454
tstats             -0.593844
Name: 3, dtype: float64

In [774]:
gp4=stats.loc[4,]
gp4.loc["tstats"] = gp4.iloc[1]/(gp4.iloc[2]/np.sqrt(gp4.iloc[0]))
gp4

LagRET  count    1005.000000
        mean       -0.009049
        std         0.443937
        min        -2.913458
        25%        -0.259511
        50%        -0.014841
        75%         0.257111
        max         1.906189
tstats             -0.646220
Name: 4, dtype: float64

**Long-short Strategy**

Even though the tstat is not significant, I will short sell etfs in group 1 since it has the smallest beta1 (<0), meaning the reversal signal is the strongest among groups.

In [775]:
cc1=etf2[["RET","LagRET","SIZERANK"]].dropna().loc[etf2[["RET","LagRET","SIZERANK"]].dropna()["SIZERANK"]==1]
cc1

Unnamed: 0_level_0,Unnamed: 1_level_0,RET,LagRET,SIZERANK
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-05,11182.0,-0.042056,0.045603,1
2016-01-05,11264.0,0.010132,-0.056057,1
2016-01-05,11407.0,-0.002516,-0.010951,1
2016-01-05,12105.0,-0.012903,-0.007471,1
2016-01-05,12109.0,-0.008409,-0.040698,1
...,...,...,...,...
2019-12-31,93336.0,0.009003,-0.000944,1
2019-12-31,93343.0,-0.008876,0.021920,1
2019-12-31,93360.0,0.013605,0.005307,1
2019-12-31,93378.0,0.005857,-0.006251,1


In [776]:
cc1[["signal"]] = cc1["LagRET"].groupby("DATE").apply(quintiles)
cc1

Unnamed: 0_level_0,Unnamed: 1_level_0,RET,LagRET,SIZERANK,signal
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-05,11182.0,-0.042056,0.045603,1,4
2016-01-05,11264.0,0.010132,-0.056057,1,1
2016-01-05,11407.0,-0.002516,-0.010951,1,3
2016-01-05,12105.0,-0.012903,-0.007471,1,3
2016-01-05,12109.0,-0.008409,-0.040698,1,1
...,...,...,...,...,...
2019-12-31,93336.0,0.009003,-0.000944,1,3
2019-12-31,93343.0,-0.008876,0.021920,1,4
2019-12-31,93360.0,0.013605,0.005307,1,4
2019-12-31,93378.0,0.005857,-0.006251,1,2


In [777]:
LSS=cc1.groupby(["signal","DATE"])["RET"].mean() 
LSS

signal  DATE      
1       2016-01-05    0.005175
        2016-01-06   -0.015072
        2016-01-07   -0.050205
        2016-01-08   -0.011350
        2016-01-11   -0.010626
                        ...   
4       2019-12-24    0.003098
        2019-12-26    0.005319
        2019-12-27   -0.001282
        2019-12-30   -0.003108
        2019-12-31    0.000386
Name: RET, Length: 4020, dtype: float64

We will long the group 1 (weak reversal signal) and short group 4 (strong Reversal signal). Assume annual rf rate is 3%.

In [778]:
lowminushigh = (LSS.loc[1]-LSS.loc[4])
sharpe = (lowminushigh.describe()["mean"]*250 - 0.03 )/(lowminushigh.describe()["std"] * np.sqrt(250))
sharpe

0.5258315220434624

***Full-sample test***

In [779]:
params1=etf2[["RET","LagRET"]].dropna().groupby(["DATE"]).apply(regfun)
params1

Unnamed: 0_level_0,Intercept,LagRET
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-05,-0.000403,-0.117847
2016-01-06,-0.011097,0.478849
2016-01-07,-0.007350,0.857141
2016-01-08,-0.003786,0.246509
2016-01-11,-0.002116,0.198867
...,...,...
2019-12-24,0.000267,0.459035
2019-12-26,0.002767,0.434349
2019-12-27,0.000191,-0.154980
2019-12-30,-0.002773,-0.050409


In [780]:
stats1=params1[["LagRET"]].describe()
stats1

Unnamed: 0,LagRET
count,1005.0
mean,-0.018223
std,0.436966
min,-2.329516
25%,-0.272386
50%,-0.012542
75%,0.233267
max,1.731284


In [781]:
stats1.loc["tstats"] = stats1.iloc[1]/(stats1.iloc[2]/np.sqrt(stats1.iloc[0]))
stats1

Unnamed: 0,LagRET
count,1005.0
mean,-0.018223
std,0.436966
min,-2.329516
25%,-0.272386
50%,-0.012542
75%,0.233267
max,1.731284
tstats,-1.322101


**b. OC-OC**

Seond, I will look at the power of reversal for CC(Ret) - CC(Ret), which means sell occurs at market close and buy only occurs at open.

In [782]:
def regfun(df):
    results = smf.ols('DayR ~ LagDayR', data=df).fit()
    return results.params

In [783]:
etf2[["DayR","LagDayR","SIZERANK"]].dropna()

Unnamed: 0_level_0,Unnamed: 1_level_0,DayR,LagDayR,SIZERANK
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-05,11182.0,-0.014423,0.027200,1
2016-01-05,11264.0,-0.003609,-0.006432,1
2016-01-05,11407.0,-0.004520,-0.005007,1
2016-01-05,11996.0,0.002974,-0.001186,3
2016-01-05,12035.0,-0.001653,0.021429,4
...,...,...,...,...
2019-12-31,93377.0,0.003912,-0.006617,3
2019-12-31,93378.0,0.008921,-0.010632,1
2019-12-31,93379.0,-0.001421,-0.005208,2
2019-12-31,93385.0,0.000293,0.000882,2


In [784]:
params=etf2[["DayR","LagDayR","SIZERANK"]].dropna().groupby(["SIZERANK","DATE"]).apply(regfun)
params

Unnamed: 0_level_0,Unnamed: 1_level_0,Intercept,LagDayR
SIZERANK,DATE,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2016-01-05,-0.000838,-0.011002
1,2016-01-06,-0.002520,0.538192
1,2016-01-07,-0.004137,0.248250
1,2016-01-08,-0.004985,1.080369
1,2016-01-11,-0.003427,0.512626
...,...,...,...
4,2019-12-24,0.000452,0.371333
4,2019-12-26,0.001899,0.003266
4,2019-12-27,-0.002414,-0.050344
4,2019-12-30,-0.002180,0.621455


In [785]:
stats=params[["LagDayR"]].groupby("SIZERANK").describe()
stats

Unnamed: 0_level_0,LagDayR,LagDayR,LagDayR,LagDayR,LagDayR,LagDayR,LagDayR,LagDayR
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
SIZERANK,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,1005.0,-0.01469,0.442993,-1.956618,-0.273975,-0.015963,0.252293,2.145669
2,1005.0,-0.006375,0.461862,-2.17058,-0.262212,-0.007865,0.259732,2.565579
3,1005.0,-0.013539,0.493275,-2.18206,-0.278776,-0.023077,0.234416,2.180457
4,1005.0,0.001084,0.442572,-1.965085,-0.240264,-0.020395,0.243695,2.174266


In [786]:
gp1=stats.loc[1,]
gp1.loc["tstats"] = gp1.iloc[1]/(gp1.iloc[2]/np.sqrt(gp1.iloc[0]))
gp1

LagDayR  count    1005.000000
         mean       -0.014690
         std         0.442993
         min        -1.956618
         25%        -0.273975
         50%        -0.015963
         75%         0.252293
         max         2.145669
tstats              -1.051239
Name: 1, dtype: float64

In [787]:
gp2=stats.loc[2,]
gp2.loc["tstats"] = gp2.iloc[1]/(gp2.iloc[2]/np.sqrt(gp2.iloc[0]))
gp2

LagDayR  count    1005.000000
         mean       -0.006375
         std         0.461862
         min        -2.170580
         25%        -0.262212
         50%        -0.007865
         75%         0.259732
         max         2.565579
tstats              -0.437571
Name: 2, dtype: float64

In [788]:
gp3=stats.loc[3,]
gp3.loc["tstats"] = gp3.iloc[1]/(gp3.iloc[2]/np.sqrt(gp3.iloc[0]))
gp3

LagDayR  count    1005.000000
         mean       -0.013539
         std         0.493275
         min        -2.182060
         25%        -0.278776
         50%        -0.023077
         75%         0.234416
         max         2.180457
tstats              -0.870116
Name: 3, dtype: float64

In [789]:
gp4=stats.loc[4,]
gp4.loc["tstats"] = gp4.iloc[1]/(gp4.iloc[2]/np.sqrt(gp4.iloc[0]))
gp4

LagDayR  count    1005.000000
         mean        0.001084
         std         0.442572
         min        -1.965085
         25%        -0.240264
         50%        -0.020395
         75%         0.243695
         max         2.174266
tstats               0.077667
Name: 4, dtype: float64

**Long-Short Strategy**

Similarly, even though the tstat is not significant, I will short sell etfs in group 1 since it has the smallest beta1 (<0), meaning the reversal signal is the strongest among groups.

In [790]:
oc1=etf2[["DayR","LagDayR","SIZERANK"]].dropna().loc[etf2[["DayR","LagDayR","SIZERANK"]].dropna()["SIZERANK"]==1]
oc1

Unnamed: 0_level_0,Unnamed: 1_level_0,DayR,LagDayR,SIZERANK
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-05,11182.0,-0.014423,0.027200,1
2016-01-05,11264.0,-0.003609,-0.006432,1
2016-01-05,11407.0,-0.004520,-0.005007,1
2016-01-05,12105.0,-0.002445,-0.003215,1
2016-01-05,12109.0,-0.017342,-0.022222,1
...,...,...,...,...
2019-12-31,93336.0,0.012201,0.000000,1
2019-12-31,93343.0,-0.008142,0.021920,1
2019-12-31,93360.0,0.013605,0.004098,1
2019-12-31,93378.0,0.008921,-0.010632,1


In [793]:
oc1[["signal"]] = oc1["LagDayR"].groupby("DATE").apply(quintiles)
oc1

Unnamed: 0_level_0,Unnamed: 1_level_0,DayR,LagDayR,SIZERANK,signal
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-05,11182.0,-0.014423,0.027200,1,4
2016-01-05,11264.0,-0.003609,-0.006432,1,2
2016-01-05,11407.0,-0.004520,-0.005007,1,2
2016-01-05,12105.0,-0.002445,-0.003215,1,2
2016-01-05,12109.0,-0.017342,-0.022222,1,1
...,...,...,...,...,...
2019-12-31,93336.0,0.012201,0.000000,1,3
2019-12-31,93343.0,-0.008142,0.021920,1,4
2019-12-31,93360.0,0.013605,0.004098,1,4
2019-12-31,93378.0,0.008921,-0.010632,1,1


In [794]:
LSS=oc1.groupby(["signal","DATE"])["DayR"].mean()
LSS

signal  DATE      
1       2016-01-05    0.000797
        2016-01-06   -0.008431
        2016-01-07   -0.012418
        2016-01-08   -0.029240
        2016-01-11   -0.021166
                        ...   
4       2019-12-24    0.000798
        2019-12-26    0.001244
        2019-12-27   -0.000226
        2019-12-30    0.002085
        2019-12-31   -0.004109
Name: DayR, Length: 4020, dtype: float64

In [795]:
lowminushigh = (LSS.loc[1]-LSS.loc[4])
sharpe = (lowminushigh.describe()["mean"]*250 - 0.03 )/(lowminushigh.describe()["std"] * np.sqrt(250))
sharpe

0.14610810270813857

***Full-sample test***

In [796]:
params2=etf2[["DayR","LagDayR"]].dropna().groupby(["DATE"]).apply(regfun)
params2

Unnamed: 0_level_0,Intercept,LagDayR
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-05,-0.000196,0.031357
2016-01-06,-0.001059,0.429906
2016-01-07,-0.004278,0.177166
2016-01-08,-0.006880,1.159788
2016-01-11,-0.002550,0.378466
...,...,...
2019-12-24,0.000271,0.297780
2019-12-26,0.001190,0.126783
2019-12-27,-0.002706,0.207693
2019-12-30,-0.003129,0.130001


In [797]:
stats2=params2[["LagDayR"]].describe()
stats2

Unnamed: 0,LagDayR
count,1005.0
mean,-0.012748
std,0.428088
min,-1.877023
25%,-0.249458
50%,-0.021166
75%,0.234359
max,2.2097


In [798]:
stats2.loc["tstats"] = stats2.iloc[1]/(stats2.iloc[2]/np.sqrt(stats2.iloc[0]))
stats2

Unnamed: 0,LagDayR
count,1005.0
mean,-0.012748
std,0.428088
min,-1.877023
25%,-0.249458
50%,-0.021166
75%,0.234359
max,2.2097
tstats,-0.944038


**c. CO-CO**

Third, I will look at the power of reversal for CO(NightR) - CO, which means sell occurs at open and buy only occur at market close, which is mainly adopted by institutional investors.

In [799]:
def regfun(df):
    results = smf.ols('NightR ~ LagNightR', data=df).fit()
    return results.params

In [800]:
etf2[["NightR","LagNightR","SIZERANK"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,NightR,LagNightR,SIZERANK
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-04,11182.0,,,1
2016-01-04,11264.0,,,1
2016-01-04,11407.0,,,1
2016-01-04,11996.0,,,3
2016-01-04,12035.0,,,4
...,...,...,...,...
2019-12-31,93377.0,0.001567,0.000389,3
2019-12-31,93378.0,-0.003037,0.004428,1
2019-12-31,93379.0,0.004760,0.001423,2
2019-12-31,93385.0,0.002642,-0.002053,2


In [801]:
params=etf2[["NightR","LagNightR","SIZERANK"]].dropna().groupby(["SIZERANK","DATE"]).apply(regfun)
params

Unnamed: 0_level_0,Unnamed: 1_level_0,Intercept,LagNightR
SIZERANK,DATE,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2016-01-06,-0.006406,0.168905
1,2016-01-07,-0.003805,1.054038
1,2016-01-08,0.000378,-0.395042
1,2016-01-11,0.000984,0.295062
1,2016-01-12,0.004044,0.533892
...,...,...,...
4,2019-12-24,0.000097,0.030471
4,2019-12-26,0.001531,0.239909
4,2019-12-27,0.002277,0.163452
4,2019-12-30,-0.000315,0.136882


In [802]:
stats=params[["LagNightR"]].groupby("SIZERANK").describe()
stats

Unnamed: 0_level_0,LagNightR,LagNightR,LagNightR,LagNightR,LagNightR,LagNightR,LagNightR,LagNightR
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
SIZERANK,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,1004.0,0.033157,1.485409,-17.274927,-0.262676,0.006953,0.259871,29.56405
2,1004.0,0.061637,2.550803,-42.766666,-0.260072,0.002892,0.256658,42.411009
3,1004.0,0.005358,1.174504,-15.231322,-0.293757,0.006282,0.290407,20.976403
4,1004.0,-0.012414,0.754881,-15.309265,-0.267719,0.021713,0.27069,2.907173


In [803]:
gp1=stats.loc[1,]
gp1.loc["tstats"] = gp1.iloc[1]/(gp1.iloc[2]/np.sqrt(gp1.iloc[0]))
gp1

LagNightR  count    1004.000000
           mean        0.033157
           std         1.485409
           min       -17.274927
           25%        -0.262676
           50%         0.006953
           75%         0.259871
           max        29.564050
tstats                 0.707296
Name: 1, dtype: float64

In [804]:
gp2=stats.loc[2,]
gp2.loc["tstats"] = gp2.iloc[1]/(gp2.iloc[2]/np.sqrt(gp2.iloc[0]))
gp2

LagNightR  count    1004.000000
           mean        0.061637
           std         2.550803
           min       -42.766666
           25%        -0.260072
           50%         0.002892
           75%         0.256658
           max        42.411009
tstats                 0.765648
Name: 2, dtype: float64

In [805]:
gp3=stats.loc[3,]
gp3.loc["tstats"] = gp3.iloc[1]/(gp3.iloc[2]/np.sqrt(gp3.iloc[0]))
gp3

LagNightR  count    1004.000000
           mean        0.005358
           std         1.174504
           min       -15.231322
           25%        -0.293757
           50%         0.006282
           75%         0.290407
           max        20.976403
tstats                 0.144552
Name: 3, dtype: float64

In [806]:
gp4=stats.loc[4,]
gp4.loc["tstats"] = gp4.iloc[1]/(gp4.iloc[2]/np.sqrt(gp4.iloc[0]))
gp4

LagNightR  count    1004.000000
           mean       -0.012414
           std         0.754881
           min       -15.309265
           25%        -0.267719
           50%         0.021713
           75%         0.270690
           max         2.907173
tstats                -0.521071
Name: 4, dtype: float64

**long-short strategy**

Since group 4 has the strongest reversal effect, I will focus on the largest market cap etfs for the night returns.

In [807]:
co1 = etf2[["NightR","LagNightR","SIZERANK"]].dropna().loc[etf2[["NightR","LagNightR","SIZERANK"]].dropna()["SIZERANK"]==4]
co1

Unnamed: 0_level_0,Unnamed: 1_level_0,NightR,LagNightR,SIZERANK
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-06,12035.0,-0.023179,-0.004525,4
2016-01-06,12305.0,-0.014677,0.001411,4
2016-01-06,12412.0,-0.012046,0.001780,4
2016-01-06,12508.0,-0.018757,0.000226,4
2016-01-06,12535.0,-0.013629,0.002530,4
...,...,...,...,...
2019-12-31,93183.0,-0.001834,0.000000,4
2019-12-31,93186.0,-0.001421,-0.001266,4
2019-12-31,93219.0,0.000000,0.002908,4
2019-12-31,93221.0,0.002609,0.000000,4


In [808]:
co1[["signal"]] = co1["LagNightR"].groupby("DATE").apply(quintiles)
co1

Unnamed: 0_level_0,Unnamed: 1_level_0,NightR,LagNightR,SIZERANK,signal
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-06,12035.0,-0.023179,-0.004525,4,1
2016-01-06,12305.0,-0.014677,0.001411,4,2
2016-01-06,12412.0,-0.012046,0.001780,4,3
2016-01-06,12508.0,-0.018757,0.000226,4,2
2016-01-06,12535.0,-0.013629,0.002530,4,3
...,...,...,...,...,...
2019-12-31,93183.0,-0.001834,0.000000,4,2
2019-12-31,93186.0,-0.001421,-0.001266,4,1
2019-12-31,93219.0,0.000000,0.002908,4,4
2019-12-31,93221.0,0.002609,0.000000,4,2


In [809]:
LSS=co1.groupby(["signal","DATE"])["NightR"].mean()
LSS

signal  DATE      
1       2016-01-06   -0.012690
        2016-01-07   -0.020947
        2016-01-08    0.008916
        2016-01-11   -0.000630
        2016-01-12    0.000025
                        ...   
4       2019-12-24    0.000417
        2019-12-26    0.002521
        2019-12-27    0.003695
        2019-12-30    0.000497
        2019-12-31   -0.000236
Name: NightR, Length: 4016, dtype: float64

In [810]:
lowminushigh = (LSS.loc[1]-LSS.loc[4])
sharpe = (lowminushigh.describe()["mean"]*250 - 0.03 )/(lowminushigh.describe()["std"] * np.sqrt(250))
sharpe

0.00019344171107907624

***Full-sample test***

In [811]:
params3=etf2[["NightR","LagNightR"]].dropna().groupby(["DATE"]).apply(regfun)
params3

Unnamed: 0_level_0,Intercept,LagNightR
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-06,-0.009372,-0.211042
2016-01-07,-0.002079,1.055337
2016-01-08,-0.000577,-0.412700
2016-01-11,0.002202,0.296084
2016-01-12,0.003769,0.586200
...,...,...
2019-12-24,0.000207,0.052072
2019-12-26,0.001726,0.629330
2019-12-27,0.001797,0.101753
2019-12-30,-0.000106,0.085170


In [812]:
stats3=params3[["LagNightR"]].describe()
stats3

Unnamed: 0,LagNightR
count,1004.0
mean,0.028349
std,1.185103
min,-12.760177
25%,-0.25171
50%,0.002008
75%,0.250784
max,24.140285


In [813]:
stats3.loc["tstats"] = stats3.iloc[1]/(stats3.iloc[2]/np.sqrt(stats3.iloc[0]))
stats3

Unnamed: 0,LagNightR
count,1004.0
mean,0.028349
std,1.185103
min,-12.760177
25%,-0.25171
50%,0.002008
75%,0.250784
max,24.140285
tstats,0.757954


**d. OO-OO**

Fourth, I will look at the power of reversal for OO(Open R) - OO, which means sell and buy only occur at market open, which is adopted by lots of retail investors.

In [814]:
def regfun(df):
    results = smf.ols('OPENR ~ LagOPENR', data=df).fit()
    return results.params

In [815]:
etf2[["OPENR","LagOPENR","SIZERANK"]].dropna()

Unnamed: 0_level_0,Unnamed: 1_level_0,OPENR,LagOPENR,SIZERANK
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-06,11182.0,-0.125000,-0.001600,1
2016-01-06,11264.0,-0.042199,0.007271,1
2016-01-06,11407.0,-0.019086,-0.003004,1
2016-01-06,11996.0,-0.005354,-0.003557,3
2016-01-06,12035.0,-0.024793,0.016807,4
...,...,...,...,...
2019-12-31,93377.0,-0.005060,-0.001555,3
2019-12-31,93378.0,-0.013637,0.002418,1
2019-12-31,93379.0,-0.000473,0.005714,2
2019-12-31,93385.0,0.003526,0.008595,2


In [816]:
params=etf2[["OPENR","LagOPENR","SIZERANK"]].dropna().groupby(["SIZERANK","DATE"]).apply(regfun)
params

Unnamed: 0_level_0,Unnamed: 1_level_0,Intercept,LagOPENR
SIZERANK,DATE,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2016-01-06,-0.007145,0.147870
1,2016-01-07,-0.002973,1.387084
1,2016-01-08,-0.001159,-0.049564
1,2016-01-11,-0.008348,-0.436062
1,2016-01-12,-0.003216,0.080732
...,...,...,...
4,2019-12-24,-0.000509,-0.199243
4,2019-12-26,0.002050,0.503749
4,2019-12-27,0.004486,-0.031582
4,2019-12-30,-0.002305,-0.038264


In [817]:
stats=params[["LagOPENR"]].groupby("SIZERANK").describe()
stats

Unnamed: 0_level_0,LagOPENR,LagOPENR,LagOPENR,LagOPENR,LagOPENR,LagOPENR,LagOPENR,LagOPENR
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
SIZERANK,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,1004.0,0.002346,0.74263,-5.013527,-0.284075,-0.006037,0.238767,13.22473
2,1004.0,0.032646,1.175956,-17.991151,-0.260921,0.007587,0.274513,20.38518
3,1004.0,-0.034434,1.395971,-29.432046,-0.285744,-0.0017,0.279843,18.880713
4,1004.0,-0.017579,0.518894,-4.729211,-0.270912,0.000133,0.260969,2.776164


In [818]:
gp1=stats.loc[1,]
gp1.loc["tstats"] = gp1.iloc[1]/(gp1.iloc[2]/np.sqrt(gp1.iloc[0]))
gp1

LagOPENR  count    1004.000000
          mean        0.002346
          std         0.742630
          min        -5.013527
          25%        -0.284075
          50%        -0.006037
          75%         0.238767
          max        13.224730
tstats                0.100080
Name: 1, dtype: float64

In [819]:
gp2=stats.loc[2,]
gp2.loc["tstats"] = gp2.iloc[1]/(gp2.iloc[2]/np.sqrt(gp2.iloc[0]))
gp2

LagOPENR  count    1004.000000
          mean        0.032646
          std         1.175956
          min       -17.991151
          25%        -0.260921
          50%         0.007587
          75%         0.274513
          max        20.385180
tstats                0.879639
Name: 2, dtype: float64

In [820]:
gp3=stats.loc[3,]
gp3.loc["tstats"] = gp3.iloc[1]/(gp3.iloc[2]/np.sqrt(gp3.iloc[0]))
gp3

LagOPENR  count    1004.000000
          mean       -0.034434
          std         1.395971
          min       -29.432046
          25%        -0.285744
          50%        -0.001700
          75%         0.279843
          max        18.880713
tstats               -0.781581
Name: 3, dtype: float64

In [821]:
gp4=stats.loc[4,]
gp4.loc["tstats"] = gp4.iloc[1]/(gp4.iloc[2]/np.sqrt(gp4.iloc[0]))
gp4

LagOPENR  count    1004.000000
          mean       -0.017579
          std         0.518894
          min        -4.729211
          25%        -0.270912
          50%         0.000133
          75%         0.260969
          max         2.776164
tstats               -1.073448
Name: 4, dtype: float64

***Long-Short Strategy***

In [822]:
oo1 = etf2[["OPENR","LagOPENR","SIZERANK"]].dropna().loc[etf2[["OPENR","LagOPENR","SIZERANK"]].dropna()["SIZERANK"]==3]
oo1

Unnamed: 0_level_0,Unnamed: 1_level_0,OPENR,LagOPENR,SIZERANK
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-06,11996.0,-0.005354,-0.003557,3
2016-01-06,12059.0,0.003354,0.000000,3
2016-01-06,12064.0,0.001190,-0.000396,3
2016-01-06,12065.0,0.001691,-0.001314,3
2016-01-06,12075.0,0.003528,0.001472,3
...,...,...,...,...
2019-12-31,93254.0,-0.002580,0.000861,3
2019-12-31,93268.0,0.027577,0.015145,3
2019-12-31,93284.0,-0.032126,-0.025276,3
2019-12-31,93318.0,0.025398,-0.015663,3


In [823]:
oo1[["signal"]] = oo1["LagOPENR"].groupby("DATE").apply(quintiles)
oo1

Unnamed: 0_level_0,Unnamed: 1_level_0,OPENR,LagOPENR,SIZERANK,signal
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-06,11996.0,-0.005354,-0.003557,3,1
2016-01-06,12059.0,0.003354,0.000000,3,2
2016-01-06,12064.0,0.001190,-0.000396,3,2
2016-01-06,12065.0,0.001691,-0.001314,3,2
2016-01-06,12075.0,0.003528,0.001472,3,3
...,...,...,...,...,...
2019-12-31,93254.0,-0.002580,0.000861,3,4
2019-12-31,93268.0,0.027577,0.015145,3,4
2019-12-31,93284.0,-0.032126,-0.025276,3,1
2019-12-31,93318.0,0.025398,-0.015663,3,1


In [824]:
LSS=oo1.groupby(["signal","DATE"])["OPENR"].mean()
LSS

signal  DATE      
1       2016-01-06   -0.009929
        2016-01-07   -0.026735
        2016-01-08   -0.000558
        2016-01-11   -0.012072
        2016-01-12    0.000418
                        ...   
4       2019-12-24   -0.001981
        2019-12-26    0.006649
        2019-12-27    0.003374
        2019-12-30   -0.002653
        2019-12-31   -0.001121
Name: OPENR, Length: 4016, dtype: float64

In [825]:
lowminushigh = (LSS.loc[1]-LSS.loc[4])
sharpe = (lowminushigh.describe()["mean"]*250 - 0.03 )/(lowminushigh.describe()["std"] * np.sqrt(250))
sharpe

0.670426169881513

***Full-sample Regression***

In [826]:
params4=etf2[["OPENR","LagOPENR"]].dropna().groupby(["DATE"]).apply(regfun)
params4

Unnamed: 0_level_0,Intercept,LagOPENR
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-06,-0.009799,-0.148223
2016-01-07,-0.001850,1.164745
2016-01-08,-0.000725,-0.044431
2016-01-11,-0.008749,-0.307440
2016-01-12,-0.002159,-0.079562
...,...,...
2019-12-24,-0.000148,-0.292055
2019-12-26,0.002039,0.499264
2019-12-27,0.003108,0.051291
2019-12-30,-0.002105,-0.099051


In [827]:
stats4=params4[["LagOPENR"]].describe()
stats4

Unnamed: 0,LagOPENR
count,1004.0
mean,0.00061
std,0.634223
min,-3.760994
25%,-0.257462
50%,0.001488
75%,0.251182
max,9.669735


In [828]:
stats4.loc["tstats"] = stats4.iloc[1]/(stats4.iloc[2]/np.sqrt(stats4.iloc[0]))
stats4

Unnamed: 0,LagOPENR
count,1004.0
mean,0.00061
std,0.634223
min,-3.760994
25%,-0.257462
50%,0.001488
75%,0.251182
max,9.669735
tstats,0.030455


# BACK TEST

It seems like OO-OO can generate highest sharpe ratio. The strategy is now tested by 2021 to 2022 data.

**Clean Data**

In [829]:
etf=pd.read_feather("etfdata_new.feather")
etf=etf.drop(columns=["COMNAM","VOL","TICKER","SICCD","ASKHI","BIDLO","BID","ASK","RETX","DIVAMT"])
etf = etf.loc[(etf["SHRCD"] == 73) & ( etf["DATE"] <= "2022-06-01") &  ( "2020-01-01" <= etf["DATE"])]
etf["PRC"] = np.abs(etf["PRC"])
etf["MKCAP"]=etf["SHROUT"]*etf["PRC"]
NAPERMNO=etf.loc[etf["OPENPRC"].isna()]["PERMNO"].unique().tolist()
etf=etf.loc[~etf["PERMNO"].isin(NAPERMNO)]
etf1=etf.set_index(["PERMNO","DATE"]).sort_index()
etf1["LagPRC"]=etf1.groupby("PERMNO")["PRC"].shift()
etf1["LagOPR"] = etf1.groupby("PERMNO")["OPENPRC"].shift()
etf1["DayR"] = etf1["PRC"]/etf1["OPENPRC"] - 1
etf1["NightR"] = etf1["OPENPRC"]/etf1["LagPRC"] - 1
etf1["OPENR"] = etf1["OPENPRC"]/etf1["LagOPR"] - 1
etf1["LagRET"] = etf1.groupby("PERMNO")["RET"].shift()
etf1["LagNightR"] = etf1["NightR"].groupby("PERMNO").shift()
etf1["LagDayR"] = etf1["DayR"].groupby("PERMNO").shift()
etf1["LagOPENR"] =  etf1["OPENR"].groupby("PERMNO").shift()
etf2=etf1.reorder_levels(["DATE","PERMNO"]).sort_index()
etf2

Unnamed: 0_level_0,Unnamed: 1_level_0,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP,LagPRC,LagOPR,DayR,NightR,OPENR,LagRET,LagNightR,LagDayR,LagOPENR
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-01-02,11264.0,73.0,27.530001,28.820000,-0.027208,700.0,1.927100e+04,,,-0.044761,,,,,,
2020-01-02,11407.0,73.0,27.700001,27.700001,0.013168,18075.0,5.006775e+05,,,0.000000,,,,,,
2020-01-02,11996.0,73.0,34.070000,34.029999,0.001764,141323.0,4.814875e+06,,,0.001175,,,,,,
2020-01-02,12035.0,73.0,8.610000,8.580000,0.012941,938262.0,8.078435e+06,,,0.003496,,,,,,
2020-01-02,12054.0,73.0,33.730000,33.439999,0.028040,3725.0,1.256442e+05,,,0.008672,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-06-01,93377.0,73.0,24.860001,25.040001,-0.004804,18850.0,4.686110e+05,24.980000,24.790001,-0.007189,0.002402,0.010085,0.016687,0.008954,0.007664,0.017652
2022-06-01,93378.0,73.0,46.336102,46.419998,-0.024708,1200.0,5.560332e+04,47.509998,47.180000,-0.001807,-0.022943,-0.016109,-0.005026,-0.011937,0.006994,0.001061
2022-06-01,93379.0,73.0,16.209999,16.600000,-0.018171,11600.0,1.880360e+05,16.510000,16.600000,-0.023494,0.005451,0.000000,0.004258,0.009732,-0.005422,0.015291
2022-06-01,93385.0,73.0,29.420000,29.730000,-0.009485,3800.0,1.117960e+05,29.709999,29.680000,-0.010427,0.000673,0.001685,-0.007682,-0.008684,0.001011,-0.008684


In [830]:
etf2.groupby("DATE").count()

Unnamed: 0_level_0,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP,LagPRC,LagOPR,DayR,NightR,OPENR,LagRET,LagNightR,LagDayR,LagOPENR
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-01-02,1527,1527,1527,1524,1527,1527,0,0,1527,0,0,0,0,0,0
2020-01-03,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1524,0,1527,0
2020-01-06,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527
2020-01-07,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527
2020-01-08,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527,1527
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-25,1906,1906,1906,1906,1906,1906,1906,1906,1906,1906,1906,1906,1906,1906,1906
2022-05-26,1908,1908,1908,1906,1908,1908,1906,1906,1908,1906,1906,1906,1906,1906,1906
2022-05-27,1908,1908,1908,1908,1908,1908,1908,1908,1908,1908,1908,1906,1906,1908,1906
2022-05-31,1908,1908,1908,1908,1908,1908,1908,1908,1908,1908,1908,1908,1908,1908,1908


In [831]:
def quintiles(inser):
    outser = pd.qcut(inser, q=4, labels = range(1,5))
    return outser

In [832]:
etf2["SIZERANK"] = etf2["MKCAP"].groupby("DATE").apply(quintiles)
etf2


Unnamed: 0_level_0,Unnamed: 1_level_0,SHRCD,PRC,OPENPRC,RET,SHROUT,MKCAP,LagPRC,LagOPR,DayR,NightR,OPENR,LagRET,LagNightR,LagDayR,LagOPENR,SIZERANK
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2020-01-02,11264.0,73.0,27.530001,28.820000,-0.027208,700.0,1.927100e+04,,,-0.044761,,,,,,,1
2020-01-02,11407.0,73.0,27.700001,27.700001,0.013168,18075.0,5.006775e+05,,,0.000000,,,,,,,3
2020-01-02,11996.0,73.0,34.070000,34.029999,0.001764,141323.0,4.814875e+06,,,0.001175,,,,,,,4
2020-01-02,12035.0,73.0,8.610000,8.580000,0.012941,938262.0,8.078435e+06,,,0.003496,,,,,,,4
2020-01-02,12054.0,73.0,33.730000,33.439999,0.028040,3725.0,1.256442e+05,,,0.008672,,,,,,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-06-01,93377.0,73.0,24.860001,25.040001,-0.004804,18850.0,4.686110e+05,24.980000,24.790001,-0.007189,0.002402,0.010085,0.016687,0.008954,0.007664,0.017652,3
2022-06-01,93378.0,73.0,46.336102,46.419998,-0.024708,1200.0,5.560332e+04,47.509998,47.180000,-0.001807,-0.022943,-0.016109,-0.005026,-0.011937,0.006994,0.001061,1
2022-06-01,93379.0,73.0,16.209999,16.600000,-0.018171,11600.0,1.880360e+05,16.510000,16.600000,-0.023494,0.005451,0.000000,0.004258,0.009732,-0.005422,0.015291,2
2022-06-01,93385.0,73.0,29.420000,29.730000,-0.009485,3800.0,1.117960e+05,29.709999,29.680000,-0.010427,0.000673,0.001685,-0.007682,-0.008684,0.001011,-0.008684,2


In [833]:
oo1 = etf2[["OPENR","LagOPENR","SIZERANK"]].dropna().loc[etf2[["OPENR","LagOPENR","SIZERANK"]].dropna()["SIZERANK"]==3]
oo1


Unnamed: 0_level_0,Unnamed: 1_level_0,OPENR,LagOPENR,SIZERANK
DATE,PERMNO,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-06,11407.0,-0.008778,-0.012996,3
2020-01-06,12098.0,-0.016954,-0.001509,3
2020-01-06,12287.0,-0.016044,0.007050,3
2020-01-06,12289.0,0.002629,0.003365,3
2020-01-06,12292.0,-0.001518,-0.007749,3
...,...,...,...,...
2022-06-01,93318.0,-0.012223,-0.012075,3
2022-06-01,93363.0,-0.000157,0.013395,3
2022-06-01,93366.0,-0.004977,0.008172,3
2022-06-01,93376.0,0.000128,0.003456,3


In [834]:
oo1[["signal"]] = oo1["LagOPENR"].groupby("DATE").apply(quintiles)

In [835]:
LSS=oo1.groupby(["signal","DATE"])["OPENR"].mean()
lowminushigh = (LSS.loc[1]-LSS.loc[4])
sharpe = (lowminushigh.describe()["mean"]*250 - 0.03 )/(lowminushigh.describe()["std"] * np.sqrt(250))
sharpe

0.8286922188754217

The Strategy is consistent and the sharpe ratio improved from 0.67 to 0.82.