In [72]:
import numpy as np
import pandas as pd

import time
from datetime import datetime as dt

import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go




# 1. Data Preparation

3 datasets:

S&P 500 daily adjusted price

Mean-variance weights table

Min-variance weights table

In [7]:
data_adj = pd.read_csv("1-sp500_adj_price.csv")
weight_meanv = pd.read_excel('mean_weighted_user8.xlsx')
weight_minv = pd.read_excel('minimum_weighted_user8.xlsx')
weight_equal=pd.read_excel('equally_weighted_user8.xlsx')

In [10]:
weight_meanv.head()

Unnamed: 0,tic,predicted_return,weights,trade_date
0,EOG,0.033723,0.000186,19950601
1,EQT,0.037745,0.0,19950601
2,HES,0.05145,0.005125,19950601
3,NFX,0.030283,0.05,19950601
4,OKE,0.04102,0.0,19950601


In [11]:
weight_minv.head()

Unnamed: 0,tic,predicted_return,weights,trade_date
0,EOG,0.033723,0.009718,19950601
1,EQT,0.037745,0.0,19950601
2,HES,0.05145,0.007371,19950601
3,NFX,0.030283,0.05,19950601
4,OKE,0.04102,0.0,19950601


In [12]:
weight_equal.head()

Unnamed: 0,tic,predicted_return,weights,trade_date
0,EOG,0.033723,0.006993,19950601
1,EQT,0.037745,0.006993,19950601
2,HES,0.05145,0.006993,19950601
3,NFX,0.030283,0.006993,19950601
4,OKE,0.04102,0.006993,19950601


In [18]:
#the length of the 3 tables should be the same
print(len(weight_meanv),
len(weight_minv),
len(weight_equal))


12932 12932 12932


## 1.1 Get stock name, trade date, daily date

Add 20170901 as tradedate_full


In [36]:
#get unique stock name from table
#stock names are the same between mean variance and minimum variance
#only the weights are different
stocks_name = list(set(weight_meanv['tic']))
stocks_name.sort()

In [37]:
len(stocks_name)

982

In [33]:
#get all daily date
#convert to list from pandas.core.series.Series
#use set to get unique values
daily = list(set(data_adj['datadate']))
#sort it
daily.sort()
print(type(data_adj['datadate']),type(daily))

<class 'pandas.core.series.Series'> <class 'list'>


In [31]:
len(daily)


7155

In [38]:
#get unique trade date from table
tradedate = list(set(weight_meanv['trade_date']))
tradedate.sort()


In [40]:
len(tradedate)

89

In [41]:
#add the last trade date 20170901
tradedate_full = tradedate.copy()
tradedate_full.append(20170901)
len(tradedate_full)

90

## 1.2 Get the daily price matrix based on stock_name

In [None]:
#30mins running time
##################match current all stocks to daily adjusted price table#######################
# backtest daily price matrix:
# covert original table to matrix
# rows = all stocks: 971, columns = all dates:7155 days
#
start_time = time.time()

balance_daily = pd.DataFrame(index = stocks_name, columns = daily)
balance_daily = balance_daily.fillna(np.nan)

for i in stocks_name:
    ind = np.where(data_adj['tic'] == i)[0]
    tmp = data_adj.loc[ind, :]
    tmp.index = range(tmp.shape[0])
    for j in daily:
        ind2 = np.where(tmp['datadate'] == j)[0]
        if(len(ind2) != 0):
            balance_daily.loc[i,j] = tmp.loc[ind2[0], 'adj_price']

elapsed_time = time.time() - start_time
print(elapsed_time)
#writer2 = pd.ExcelWriter('balance_daily.xlsx')
#writer2 = pd.ExcelWriter('balance_daily_user8.xlsx')

#balance_daily.to_excel(writer2,'Sheet1')
#writer2.save()
###############################################

In [None]:
balance_daily = pd.read_excel('balance_daily_user8.xlsx')


# 2. Create the rebalance matrix:

In [111]:
# set initial capital to 1 million and transaction percet to 0.1%

capital = 1000000
transaction_percent = 1/1000

In [214]:
#construct an empty matrix
#rows are stocks: 971 unique stocks
#columns are trade dates
#name the index as stock names

def cal_portfolio(stocks_name,tradedate, weight_table, capital, transaction_percent):


    balance_weight = pd.DataFrame(index = stocks_name, columns = tradedate)
    balance_weight = balance_weight.fillna(0)
    
    #loop through the weights table to fill the matrix
    #need weight tabele
    #0. weights table
    for i in range(weight_table.shape[0]):
        tics = weight_table.loc[i, 'tic']
        date = weight_table.loc[i, 'trade_date']
        w = weight_table.loc[i, 'weights']
        balance_weight.loc[tics, date] = w
        
    #1. shares table
    balance_share = pd.DataFrame(index = stocks_name, columns = tradedate)
    balance_share['initial'] = 0
    balance_share = balance_share.fillna(0)
    #2. costs table
    balance_cost = pd.DataFrame(index = stocks_name, columns = tradedate)
    balance_cost = balance_cost.fillna(0)
    #3. cash table
    balance_cash = pd.DataFrame(index = stocks_name, columns = tradedate)
    balance_cash = balance_cash.fillna(0)

    #4. portfolio without transaction costs table 
    portfolio = pd.DataFrame(index = range(1), columns = tradedate_full)
    portfolio.loc[0,tradedate[0]] = capital
    
    
    # calculate portfolio value without cost
    balance_price_full = []
    balance_price_full = balance_daily[tradedate_full]
    balance_price = balance_daily[tradedate]
    for j in range(len(tradedate)):
        i = tradedate[j]
        #current capital
        cap = portfolio.loc[0, i]
        #
        balance_cash[i] = cap * balance_weight[i]
        balance_share[i] = list(np.array(balance_cash[i])
                                      / np.array(balance_price[i]))
        balance_share[i] = balance_share[i].fillna(0)
        balance_share[i] = balance_share[i].astype(int)
        #next quarter price * current share to calculate next quarter total account value
        a = balance_price_full[tradedate_full[j+1]].fillna(0)
        portfolio.loc[0, tradedate_full[j+1]] = np.dot(balance_share[i], a)

    balance_cost = abs(np.diff(balance_share, axis = 1))
    balance_cost = balance_cost * balance_price
    balance_cost = np.sum(balance_cost, axis = 0) * transaction_percent
    balance_cost['20170901'] = 0
    
    #5. portfolio with transaction cost table
    #calcualte portolio value with transaction cost
    balance_cost.index = tradedate_full
    portfolio_cost = []
    portfolio_cost = portfolio.loc[0,] - balance_cost
    portfolio_cost.index = tradedate_full
    portfolio_cost_value = portfolio_cost.copy()
    
    #6. portfolio return P&L table
    portfolio_cumsum = portfolio_cost_value.copy()
    portfolio_cumsum.index = range(len(portfolio_cumsum))
    tmp = portfolio_cumsum.shift(1)
    tmp[0] = 0
    tmp.index = tradedate_full

    portfolio_cumsum = portfolio_cost_value- tmp
    portfolio_cumsum.index = range(len(portfolio_cumsum))
    tmp.index = range(len(tmp))
    for i in range(len(portfolio_cumsum)):
        if(tmp[i] != 0):
            portfolio_cumsum[i] = portfolio_cumsum[i]/tmp[i]

    portfolio_cumsum.index = range(len(portfolio_cumsum))
    portfolio_cumsum[0] = 0
    portfolio_return=portfolio_cumsum.copy()



    # 7. portfolio accumulative return table
    initial = 0
    for i in range(portfolio_cumsum.shape[0]):
        portfolio_cumsum[i] = initial + portfolio_cumsum[i]
        initial = portfolio_cumsum[i]
    
    #1. shares table
    #2. costs table
    #3. cash table
    #4. portfolio without transaction costs table 
    #5. portfolio with transaction cost table
    #6. portfolio return P&L table
    #7. portfolio accumulative return table








    return(balance_share,balance_cost,balance_cash,portfolio,portfolio_cost,portfolio_return,portfolio_cumsum)
    
    
    
    
    

In [215]:
#calculate mean variance allocation portolio
(balance_share_meanv,
 balance_cost_meanv,
 balance_cash_meanv,
 portfolio_meanv,
 portfolio_cost_meanv,
 portfolio_return_meanv,
 portfolio_cumsum_meanv)=cal_portfolio(stocks_name,tradedate, weight_meanv, capital, transaction_percent)


In [217]:
sum(portfolio_return_meanv)

3.7725457672472325

In [218]:
#calculate minimum variance allocation portolio
(balance_share_minv,
 balance_cost_minv,
 balance_cash_minv,
 portfolio_minv,
 portfolio_cost_minv,
 portfolio_return_minv,
 portfolio_cumsum_minv)=cal_portfolio(stocks_name,tradedate, weight_minv, capital, transaction_percent)

In [222]:
portfolio_cost_minv

19950601         998480
19950901    1.10261e+06
19951201    1.18345e+06
19960301     1.2846e+06
19960603     1.4131e+06
19960903    1.38101e+06
19961202     1.4824e+06
19970303    1.59612e+06
19970602    1.73839e+06
19970902    1.94777e+06
19971201     2.1836e+06
19980302    2.35056e+06
19980601    2.42195e+06
19980901    2.04893e+06
19981201    2.37001e+06
19990301    2.25513e+06
19990601    2.44132e+06
19990901    2.41451e+06
19991201     2.2037e+06
20000301    2.43569e+06
20000601    2.63105e+06
20000901    2.92137e+06
20001201    2.82773e+06
20010301    3.17083e+06
20010601     3.3818e+06
20010904    3.53567e+06
20011203    3.69185e+06
20020301    3.89302e+06
20020603    4.10686e+06
20020903     3.7193e+06
               ...     
20100601    6.36532e+06
20100901    6.74614e+06
20101201    7.24427e+06
20110301    7.89389e+06
20110601    8.58272e+06
20110901    7.92188e+06
20111201    7.85158e+06
20120301     1.0089e+07
20120601    9.74777e+06
20120904    1.02996e+07
20121203    1.03

In [66]:
time_ind = pd.Series(tradedate_full)
for i in range(len(time_ind)):
    b = dt.strptime(str(time_ind[i]), '%Y%m%d')
    time_ind[i] = b.strftime('%Y-%m-%d')

In [171]:

trace_value = go.Scatter(x = time_ind, y = portfolio_cost_value, mode = 'lines', name = 'Mean-Variance')


data = [trace_value]
layout = dict(title = "Portfolio Value Performance Quarterly Without Transaction cost", 
              xaxis = dict(title = 'Quarterly Trade Date'), 
              yaxis = dict(title = 'Portfolio Value'))
fig = dict(data = data, layout = layout)
plotly.offline.iplot(fig)


In [181]:
#P&L return: weighted_meanv
portfolio_2 = portfolio_cost_value.copy()
portfolio_2.index = range(len(portfolio_2))
tmp = portfolio_2.shift(1)
tmp[0] = 0
tmp.index = tradedate_full

portfolio_2 = portfolio_cost_value- tmp
portfolio_2.index = range(len(portfolio_2))
tmp.index = range(len(tmp))
for i in range(len(portfolio_2)):
    if(tmp[i] != 0):
        portfolio_2[i] = portfolio_2[i]/tmp[i]

portfolio_2.index = range(len(portfolio_2))
portfolio_2[0] = 0
meanv_return=portfolio_2.copy()



# accumulative
initial = 0
for i in range(portfolio_2.shape[0]):
    portfolio_2[i] = initial + portfolio_2[i]
    initial = portfolio_2[i]

#plotly.offline.init_notebook_mode(connected=True)
#cord = [go.Scatter(x = portfolio.columns, y = portfolio_2)]
#plotly.offline.iplot(cord)

In [182]:
sum(meanv_return)

3.7725457672472325

In [183]:
trace_return = go.Scatter(x = time_ind, y = meanv_return, mode = 'lines', name = 'Mean-Variance')

data = [trace_return]
layout = dict(title = "Portfolio Return for Mean Variance", 
              xaxis = dict(title = 'Quarterly Trade Date'), 
              yaxis = dict(title = 'Portfolio Value'))
fig = dict(data = data, layout = layout)
plotly.offline.iplot(fig)

In [184]:
trace_cumulativeR = go.Scatter(x = time_ind, y = portfolio_2, mode = 'lines', name = 'Mean-Variance')

data = [trace_cumulativeR]
layout = dict(title = "Portfolio Cumulative Return for Mean Variance", 
              xaxis = dict(title = 'Quarterly Trade Date'), 
              yaxis = dict(title = 'Portfolio Value'))
fig = dict(data = data, layout = layout)
plotly.offline.iplot(fig)