# Demo Portfolio Construction and Backtest | 

This file will include the codes for backtesting a trading strategy using technical signals. I will use the two signals developed in *Demo Quantitative Stock Selection Model.ipynb* in this demo. The signals have been stored in the files "classdata/Signal1.csv" and "classdata/Signal2.csv".

In [1]:
import pandas as pd
import numpy as np
df1 = pd.read_csv('classdata/Signal1.csv')
df1['datadate']=pd.to_datetime(df1['datadate'],format="%Y-%m-%d")
df1.sort_values(by=['LPERMNO', 'datadate'], inplace=True)
df1.reset_index(drop=True, inplace=True)
df1.head()

Unnamed: 0,LPERMNO,datadate,Date,Signal
0,10001,2001-06-30,200112,1.211817
1,10001,2001-06-30,200203,1.211817
2,10001,2001-06-30,200206,1.211817
3,10001,2001-06-30,200209,1.211817
4,10001,2002-06-30,200212,0.928078


In [2]:
df2 = pd.read_csv('classdata/Signal2.csv')
df2['STATPERS']=pd.to_datetime(df2['STATPERS'],format="%Y-%m-%d")
df2.sort_values(by=['PERMNO', 'STATPERS'], inplace=True)
df2.reset_index(drop=True, inplace=True)
df2.head()

Unnamed: 0,PERMNO,STATPERS,Date,Signal
0,10002,2006-06-15,200606,0.003751
1,10002,2006-09-14,200609,0.001487
2,10002,2006-12-14,200612,0.000802
3,10002,2007-03-15,200703,0.002963
4,10002,2007-06-14,200706,0.00398


In [3]:
df3 = pd.read_csv('classdata/return.csv')
df3.sort_values(by=['permno', 'yyyymm'], inplace=True)
df3.reset_index(drop=True, inplace=True)
df3.head()

Unnamed: 0,permno,yyyymm,prc,fret1
0,10000,198603,4.4375,-0.302817
1,10000,198606,3.09375,-0.666667
2,10000,198609,1.03125,-0.5
3,10000,198612,0.51563,-0.515152
4,10000,198703,0.25,-0.125


## Generate Keys and Merge Data Frames for Signal 1

We generate a column of keys which is "PERMONDate", for example, 10002200606. Since, PERMNO and Date are integers in all dataframes, we can just let key = PERMON*100000+Date.

In [4]:
df1["key"]=df1["LPERMNO"]*1000000+df1["Date"]
df2["key"]=df2["PERMNO"]*1000000+df2["Date"]
df3["key"]=df3["permno"]*1000000+df3["yyyymm"]

Then we merge data frames using keys into one,  drop NaNs, and sort the new data frame. We will only use Signal 1 in backtesting first. 

In [5]:
df=pd.merge(df1[["key","LPERMNO","Date","Signal"]],df3[["key","prc","fret1"]],on="key")
df.dropna(how="any",inplace=True)
df.sort_values(by=['LPERMNO', 'Date'], inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,key,LPERMNO,Date,Signal,prc,fret1
0,10001200112,10001,200112,1.211817,11.45,-0.080349
1,10001200203,10001,200203,1.211817,10.4,-0.052404
2,10001200206,10001,200206,1.211817,9.72,-0.085905
3,10001200209,10001,200209,1.211817,8.75,-0.144457
4,10001200212,10001,200212,0.928078,7.351,0.05904


## Backtesting for Signal 1

We then implement the trading strategy and generat the returns of the equally weighted long-short portfolio.

In [6]:
import numpy as np
dfbenchmark=pd.read_csv('classdata/benchmark.csv')

benchmark=[]
quarterlist=[]
quarterlyreturn=[]
longreturn=[]
shortreturn=[]
for quarter in np.sort(df.Date.unique()):
    dftemp = df[df.Date==quarter].copy()   #Get data for the same quarter and save it in dftemp
    if len(dftemp)>=200:                   #Only continue if there are more than 200 stocks. 
        quarterlist.append(quarter)        #Get quarter ID
        benchmark.append(dfbenchmark[dfbenchmark.yyyymm==quarter].benchmark.iloc[0])#Get benchmark
        dftemp.sort_values(by='Signal', ascending=True, inplace=True)  #Sort by Signal 1
        #Below, we compute the returns of equally weighted long-short portfolio.
        longreturn.append(dftemp["fret1"][0:100].mean())
        shortreturn.append(-dftemp["fret1"][-100:].mean())        
        quarterlyreturn.append(dftemp["fret1"][0:100].mean()-dftemp["fret1"][-100:].mean())

#Collect the results and put them into a data frame
dfresult=pd.DataFrame(zip(quarterlist,longreturn,shortreturn,quarterlyreturn,benchmark),
                        columns=["Date","LongReturn","ShortReturn","QuarterlyReturn","Benchmark"])
dfresult.head()

Unnamed: 0,Date,LongReturn,ShortReturn,QuarterlyReturn,Benchmark
0,200109,0.326346,-0.420888,-0.094542,-0.160711
1,200112,-0.025642,0.103923,0.078281,0.128628
2,200203,-0.203986,0.285222,0.081236,0.005711
3,200206,-0.266991,0.259555,-0.007436,-0.125661
4,200209,0.384111,-0.100511,0.283601,-0.166367


Finally, we generate the performance metrics.

In [7]:
dfresult["ExcessReturn"]=dfresult.QuarterlyReturn

In [8]:
import math
#Average ExcessReturn
print(dfresult.ExcessReturn.mean())
#t-statistics 
print(dfresult.ExcessReturn.mean()/dfresult.ExcessReturn.std()*math.sqrt(len(dfresult)))

0.02711915418358943
2.306673866828156


In [9]:
#Informatin ratio
print(dfresult.ExcessReturn.mean()/dfresult.ExcessReturn.std())

0.26997575558090187


## Generate Keys and Merge Data Frames for Signal 2

In [10]:
df=pd.merge(df2[["key","PERMNO","Date","Signal"]],df3[["key","prc","fret1"]],on="key")
df.dropna(how="any",inplace=True)
df.sort_values(by=['PERMNO', 'Date'], inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,key,PERMNO,Date,Signal,prc,fret1
0,10002200606,10002,200606,0.003751,23.47,0.193865
1,10002200609,10002,200609,0.001487,27.89,-0.080315
2,10002200612,10002,200612,0.000802,25.52,-0.165752
3,10002200703,10002,200703,0.002963,21.16,-0.001418
4,10002200706,10002,200706,0.00398,21.0,-0.234286


## Backtesting for Signal 2

Almost similar to the procedure done with Signal 1. 

In [11]:
benchmark=[]
quarterlist=[]
quarterlyreturn=[]
longreturn=[]
shortreturn=[]
for quarter in np.sort(df.Date.unique()):
    dftemp = df[df.Date==quarter].copy()   #Get data for the same quarter and save it in dftemp
    if len(dftemp)>=200:                   #Only continue if there are more than 200 stocks. 
        quarterlist.append(quarter)        #Get quarter ID
        benchmark.append(dfbenchmark[dfbenchmark.yyyymm==quarter].benchmark.iloc[0])#Get benchmark
        dftemp.sort_values(by='Signal', ascending=True, inplace=True)  #Sort by Signal 2
        #Below, we compute the returns of equally weighted long-short portfolio.
        longreturn.append(dftemp["fret1"][0:100].mean())
        shortreturn.append(-dftemp["fret1"][-100:].mean())        
        quarterlyreturn.append(dftemp["fret1"][0:100].mean()-dftemp["fret1"][-100:].mean())

#Collect the results and put them into a data frame
dfresult=pd.DataFrame(zip(quarterlist,longreturn,shortreturn,quarterlyreturn,benchmark),
                        columns=["Date","LongReturn","ShortReturn","QuarterlyReturn","Benchmark"])
dfresult.head()

Unnamed: 0,Date,LongReturn,ShortReturn,QuarterlyReturn,Benchmark
0,200003,-0.038074,0.242704,0.20463,0.04389
1,200006,0.066039,0.091381,0.15742,-0.049445
2,200009,-0.185585,0.43836,0.252775,0.001961
3,200012,-0.078376,0.005539,-0.072837,-0.106433
4,200103,0.119622,-0.106226,0.013396,-0.129373


In [12]:
dfresult["ExcessReturn"]=dfresult.QuarterlyReturn

In [13]:
import math
#Average ExcessReturn
print(dfresult.ExcessReturn.mean())
#t-statistics 
print(dfresult.ExcessReturn.mean()/dfresult.ExcessReturn.std()*math.sqrt(len(dfresult)))

0.023765278658459954
1.0862978136828392


In [14]:
#Informatin ratio
print(dfresult.ExcessReturn.mean()/dfresult.ExcessReturn.std())

0.12221805269769546
