### Part 5: Create your own strategy *[15 points]*

Imagine today is **31 December 2020** (you can use any data available before 2021). You are a group of fund managers managing a quantitative fund with an **investment universe** consists of **index constituents from Part 1**. Your group wants to **design a new investment strategy and implement it starting from 2021**. The work plan is as follows:
1. Form **benchmark portfolios** based on **Indexation (VW, EW, PW)** and **Modern Portfolio Theory (GMVP and MSRP)** *[5 points]*.
2. Create and backtest a **proprietary trading strategy** that aims to **outperform the market index** *[5 points]*.
3. Keep track of the **out-of-sample performance** of **your group's strategy** as well as **benchmark portfolios** and carry out **performance evaluations** using data available in 2021 so far *[5 points]*.

Your team will **build the prototype of the strategy with Python** and the **source code** below will be inspected by the **model validation quant, Douglas Chung**. 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import yfinance as yf

In [3]:
page = pd.read_html('https://en.wikipedia.org/wiki/DAX')
constit = page[3]['Ticker symbol']
constit.head(3)

0    ADS.DE
1    ALV.DE
2    BAS.DE
Name: Ticker symbol, dtype: object

In [4]:
start_date = '2020-01-01'
end_date = '2020-12-31'

In [5]:
for i in constit:
    print(i)

    prc = yf.download(i, interval="1d", start=start_date, end=end_date)
    prc = pd.DataFrame(prc['Adj Close'])
    prc.columns = [i] # rename the column with the ticker of the stock

    try:
        df_prc = pd.concat([df_prc, prc], axis=1) 
    except:
        df_prc = prc # create the dataframe for the first ticker

    stk = yf.Ticker(i)
    try:
        stk.info['floatShares']
    except:
        stk.info['floatShares'] = None
    try:
        stk.info['sharesOutstanding']
    except:
        stk.info['sharesOutstanding'] = None

    if stk.info['floatShares']:
        mcap = prc * stk.info['floatShares']
    elif stk.info['sharesOutstanding']:
        mcap = prc * stk.info['sharesOutstanding']
    else:
        mcap = prc * (stk.info['marketCap']/stk.info['previousClose'])


    try:
        df_mcap = pd.concat([df_mcap, mcap], axis=1)
    except:
        df_mcap = mcap

ADS.DE
[*********************100%***********************]  1 of 1 completed
ALV.DE
[*********************100%***********************]  1 of 1 completed
BAS.DE
[*********************100%***********************]  1 of 1 completed
BAYN.DE
[*********************100%***********************]  1 of 1 completed
BMW.DE
[*********************100%***********************]  1 of 1 completed
CON.DE
[*********************100%***********************]  1 of 1 completed
1COV.DE
[*********************100%***********************]  1 of 1 completed
DAI.DE
[*********************100%***********************]  1 of 1 completed
DHER.DE
[*********************100%***********************]  1 of 1 completed
DBK.DE
[*********************100%***********************]  1 of 1 completed
DB1.DE
[*********************100%***********************]  1 of 1 completed
DPW.DE
[*********************100%***********************]  1 of 1 completed
DTE.DE
[*********************100%***********************]  1 of 1 completed
DWNI.DE
[

In [6]:
PRC = df_prc.copy()
MCAP = df_mcap.copy()

In [44]:
PRC.head()

Unnamed: 0_level_0,ADS.DE,ALV.DE,BAS.DE,BAYN.DE,BMW.DE,CON.DE,1COV.DE,DAI.DE,DHER.DE,DBK.DE,...,LIN.DE,MRK.DE,MTX.DE,MUV2.DE,RWE.DE,SAP.DE,SIE.DE,ENR.DE,VOW3.DE,VNA.DE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,282.80896,199.297836,56.796951,67.664917,67.103523,106.563087,36.73278,46.210567,70.800003,7.347,...,183.156754,102.388107,249.554581,244.41507,24.180037,116.524338,111.622742,,170.062805,44.188805
2020-01-03,283.196991,197.093414,55.948612,66.799782,66.28981,104.347603,36.488937,45.373741,70.980003,7.25,...,178.579071,103.595512,249.264465,241.381744,24.072412,115.053452,109.4776,,166.42601,44.353077
2020-01-06,280.965942,195.878738,55.116905,66.02668,66.045715,103.157455,34.738518,45.128704,67.5,7.185,...,176.94072,104.706329,249.45787,241.473648,23.722628,114.193848,108.969536,,166.407166,44.216187
2020-01-07,285.913025,196.328613,55.141865,66.38562,67.103523,105.391258,34.947525,45.489326,66.0,7.434,...,178.579071,105.768852,253.615494,243.128204,24.260757,114.70961,109.665764,,165.879547,44.453465
2020-01-08,293.770203,196.418594,54.667793,68.106697,67.275307,107.423637,35.557121,45.743614,66.0,7.685,...,179.109085,107.507515,254.292328,243.587799,24.368383,115.397293,110.117378,,166.595612,44.690746


In [47]:
PRC.drop(['ENR.DE'], axis=1, inplace=True)

In [48]:
RET = PRC.pct_change()

In [49]:
RET.index

DatetimeIndex(['2020-01-02', '2020-01-03', '2020-01-06', '2020-01-07',
               '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-13',
               '2020-01-14', '2020-01-15',
               ...
               '2020-12-15', '2020-12-16', '2020-12-17', '2020-12-18',
               '2020-12-21', '2020-12-22', '2020-12-23', '2020-12-28',
               '2020-12-29', '2020-12-30'],
              dtype='datetime64[ns]', name='Date', length=254, freq=None)

In [50]:
# delete first date
RET.head()

Unnamed: 0_level_0,ADS.DE,ALV.DE,BAS.DE,BAYN.DE,BMW.DE,CON.DE,1COV.DE,DAI.DE,DHER.DE,DBK.DE,...,IFX.DE,LIN.DE,MRK.DE,MTX.DE,MUV2.DE,RWE.DE,SAP.DE,SIE.DE,VOW3.DE,VNA.DE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,,,,,,,,,,,...,,,,,,,,,,
2020-01-03,0.001372,-0.011061,-0.014936,-0.012786,-0.012126,-0.02079,-0.006638,-0.018109,0.002542,-0.013203,...,-0.014262,-0.024993,0.011792,-0.001163,-0.012411,-0.004451,-0.012623,-0.019218,-0.021385,0.003718
2020-01-06,-0.007878,-0.006163,-0.014866,-0.011573,-0.003682,-0.011406,-0.047971,-0.0054,-0.049028,-0.008966,...,-0.01692,-0.009174,0.010723,0.000776,0.000381,-0.014531,-0.007471,-0.004641,-0.000113,-0.003086
2020-01-07,0.017607,0.002297,0.000453,0.005436,0.016016,0.021654,0.006017,0.007991,-0.022222,0.034656,...,0.049638,0.009259,0.010148,0.016667,0.006852,0.022684,0.004517,0.006389,-0.003171,0.005366
2020-01-08,0.027481,0.000458,-0.008597,0.025925,0.00256,0.019284,0.017443,0.00559,0.0,0.033764,...,0.012833,0.002968,0.016438,0.002669,0.00189,0.004436,0.005995,0.004118,0.004317,0.005338


In [96]:
tmp_RET = (RET + 1)

In [97]:
tmp_RET.drop(['2020-01-02'], axis=0, inplace=True)

In [98]:
tmp_RET.head()

Unnamed: 0_level_0,ADS.DE,ALV.DE,BAS.DE,BAYN.DE,BMW.DE,CON.DE,1COV.DE,DAI.DE,DHER.DE,DBK.DE,...,IFX.DE,LIN.DE,MRK.DE,MTX.DE,MUV2.DE,RWE.DE,SAP.DE,SIE.DE,VOW3.DE,VNA.DE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-03,1.001372,0.988939,0.985064,0.987214,0.987874,0.97921,0.993362,0.981891,1.002542,0.986797,...,0.985738,0.975007,1.011792,0.998837,0.987589,0.995549,0.987377,0.980782,0.978615,1.003718
2020-01-06,0.992122,0.993837,0.985134,0.988427,0.996318,0.988594,0.952029,0.9946,0.950972,0.991034,...,0.98308,0.990826,1.010723,1.000776,1.000381,0.985469,0.992529,0.995359,0.999887,0.996914
2020-01-07,1.017607,1.002297,1.000453,1.005436,1.016016,1.021654,1.006017,1.007991,0.977778,1.034656,...,1.049638,1.009259,1.010148,1.016667,1.006852,1.022684,1.004517,1.006389,0.996829,1.005366
2020-01-08,1.027481,1.000458,0.991403,1.025925,1.00256,1.019284,1.017443,1.00559,1.0,1.033764,...,1.012833,1.002968,1.016438,1.002669,1.00189,1.004436,1.005995,1.004118,1.004317,1.005338
2020-01-09,1.023609,1.010994,1.008824,1.00973,0.999328,1.023692,1.008327,1.004245,0.995454,1.004034,...,1.015016,1.01399,1.011231,1.027757,1.013962,1.012146,1.019368,1.01162,1.027712,1.002655


In [99]:
MOM = tmp_RET.rolling(11).apply(np.prod).shift(2)

In [100]:
tmp_RET.index

DatetimeIndex(['2020-01-03', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10', '2020-01-13', '2020-01-14',
               '2020-01-15', '2020-01-16',
               ...
               '2020-12-15', '2020-12-16', '2020-12-17', '2020-12-18',
               '2020-12-21', '2020-12-22', '2020-12-23', '2020-12-28',
               '2020-12-29', '2020-12-30'],
              dtype='datetime64[ns]', name='Date', length=253, freq=None)

In [133]:
MOM.index.day

Int64Index([ 3,  6,  7,  8,  9, 10, 13, 14, 15, 16,
            ...
            15, 16, 17, 18, 21, 22, 23, 28, 29, 30],
           dtype='int64', name='Date', length=253)

In [167]:
MOM = MOM.apply(pd.to_numeric)
L_cutoff = pd.to_numeric(MOM.quantile(.3,axis=1,numeric_only=False))
H_cutoff = pd.to_numeric(MOM.quantile(.7,axis=1,numeric_only=False))

In [168]:
WL_port = pd.DataFrame(index=RET.index, columns=RET.columns)

In [169]:
WL_port[MOM.gt(H_cutoff, axis=0)] = 'Winner'
WL_port[(MOM.le(H_cutoff, axis=0)) & (MOM.ge(L_cutoff, axis=0))] = 'Neutral'
WL_port[MOM.lt(L_cutoff, axis=0)] = 'Loser'
WL_port.tail()

Unnamed: 0_level_0,ADS.DE,ALV.DE,BAS.DE,BAYN.DE,BMW.DE,CON.DE,1COV.DE,DAI.DE,DHER.DE,DBK.DE,...,IFX.DE,LIN.DE,MRK.DE,MTX.DE,MUV2.DE,RWE.DE,SAP.DE,SIE.DE,VOW3.DE,VNA.DE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-22,Winner,Loser,Winner,Neutral,Loser,Neutral,Winner,Neutral,Winner,Loser,...,Winner,Neutral,Loser,Loser,Neutral,Loser,Neutral,Winner,Winner,Winner
2020-12-23,Winner,Loser,Winner,Neutral,Loser,Neutral,Winner,Loser,Winner,Loser,...,Neutral,Winner,Neutral,Loser,Loser,Loser,Neutral,Neutral,Winner,Winner
2020-12-28,Winner,Loser,Winner,Neutral,Loser,Winner,Winner,Loser,Winner,Loser,...,Winner,Neutral,Neutral,Neutral,Loser,Loser,Neutral,Neutral,Winner,Winner
2020-12-29,Winner,Neutral,Winner,Neutral,Loser,Winner,Winner,Neutral,Winner,Loser,...,Winner,Neutral,Loser,Winner,Neutral,Loser,Neutral,Neutral,Winner,Winner
2020-12-30,Winner,Neutral,Neutral,Neutral,Loser,Winner,Loser,Neutral,Winner,Loser,...,Winner,Winner,Loser,Neutral,Loser,Winner,Neutral,Winner,Neutral,Winner


In [170]:
WL_port.shape

(254, 29)

In [171]:
WL_port.index = pd.to_datetime(WL_port.index, format='%Y%m%d', errors='ignore')+ pd.offsets.MonthEnd(0)

In [176]:
PRC.index

DatetimeIndex(['2020-01-02', '2020-01-03', '2020-01-06', '2020-01-07',
               '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-13',
               '2020-01-14', '2020-01-15',
               ...
               '2020-12-15', '2020-12-16', '2020-12-17', '2020-12-18',
               '2020-12-21', '2020-12-22', '2020-12-23', '2020-12-28',
               '2020-12-29', '2020-12-30'],
              dtype='datetime64[ns]', name='Date', length=254, freq=None)

In [187]:
other_list = []
feb_list = []

feb = pd.DataFrame()
other = pd.DataFrame()

for i in PRC.index:
    if i.month == 2:
        feb_list.append(i)
    else:
        other_list.append(i)


In [202]:
feb = WL_port[WL_port.index.month == 2]
other = WL_port[WL_port.index.month != 2]

In [203]:
feb.index = feb_list
other.index = other_list

In [211]:
egg = other.merge(feb, how='outer')

In [212]:
egg

Unnamed: 0,ADS.DE,ALV.DE,BAS.DE,BAYN.DE,BMW.DE,CON.DE,1COV.DE,DAI.DE,DHER.DE,DBK.DE,...,IFX.DE,LIN.DE,MRK.DE,MTX.DE,MUV2.DE,RWE.DE,SAP.DE,SIE.DE,VOW3.DE,VNA.DE
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,Loser,Neutral,Loser,Loser,Loser,Winner,Winner,Neutral,Winner,Winner,...,Loser,Neutral,Neutral,Loser,Neutral,Winner,Neutral,Loser,Neutral,Neutral
250,Neutral,Neutral,Loser,Loser,Neutral,Winner,Winner,Loser,Neutral,Neutral,...,Loser,Winner,Neutral,Loser,Neutral,Winner,Neutral,Loser,Neutral,Winner
251,Loser,Neutral,Neutral,Loser,Neutral,Winner,Winner,Loser,Neutral,Loser,...,Loser,Neutral,Neutral,Loser,Neutral,Winner,Neutral,Loser,Neutral,Winner
252,Loser,Neutral,Loser,Loser,Neutral,Winner,Winner,Loser,Neutral,Neutral,...,Neutral,Neutral,Neutral,Loser,Loser,Winner,Neutral,Loser,Neutral,Winner


In [213]:
all_date = feb_list + other_list

In [217]:
all_date.sort()
egg.index = all_date

In [219]:
egg.head(32)

Unnamed: 0,ADS.DE,ALV.DE,BAS.DE,BAYN.DE,BMW.DE,CON.DE,1COV.DE,DAI.DE,DHER.DE,DBK.DE,...,IFX.DE,LIN.DE,MRK.DE,MTX.DE,MUV2.DE,RWE.DE,SAP.DE,SIE.DE,VOW3.DE,VNA.DE
2020-01-02,,,,,,,,,,,...,,,,,,,,,,
2020-01-03,,,,,,,,,,,...,,,,,,,,,,
2020-01-06,,,,,,,,,,,...,,,,,,,,,,
2020-01-07,,,,,,,,,,,...,,,,,,,,,,
2020-01-08,,,,,,,,,,,...,,,,,,,,,,
2020-01-09,,,,,,,,,,,...,,,,,,,,,,
2020-01-10,,,,,,,,,,,...,,,,,,,,,,
2020-01-13,,,,,,,,,,,...,,,,,,,,,,
2020-01-14,,,,,,,,,,,...,,,,,,,,,,
2020-01-15,,,,,,,,,,,...,,,,,,,,,,
