In [1]:
import sys
sys.path.insert(1, 'C:/Users/raena/Documents/Imperial College London/msc_thesis/code/functions_and_classes')

from dataRead import *
from glob import glob
import pandas as pd
import numpy as np

## Index Period 4

Backtest:
* Jan 2017 - Dec 2022
* Major event: Covid-19 pandemic

In [2]:
# Get DJI Index weights data per quarter

DJI_weights_paths = glob('../../data/dataBBG/DJI_index_weights/DJI_*.pickle')
DJI_weights_per_quarter = {}
for path in DJI_weights_paths:
    date = path.split('.')[-2].split('_')[-1]
    DJI_weights_per_quarter[date] = pd.read_pickle(path)

# Import price and volume data for each stock in the DJI Index
DJI_PX_LAST_paths = glob('../../data/dataBBG/DJI_stock_PX_LAST/* Equity.pickle')
DJI_PX_VOLUME_paths = glob('../../data/dataBBG/DJI_stock_PX_VOLUME/* Equity.pickle')

DJI_price_data = {}
DJI_volume_data = {}

for path in DJI_PX_LAST_paths:
    ticker = (' ').join(path.split('.')[-2].split('\\')[-1].split(' ')[0:1])
    # if ticker == '3277Q UN': # JP Morgan
    #     path = '../../data/dataBBG/DJI_stock_PX_LAST/JPM UN Equity.pickle'
    df = pd.read_pickle(path).set_index('date')
    df.index = pd.to_datetime(df.index)
    DJI_price_data[ticker] = df

for path in DJI_PX_VOLUME_paths:
    ticker = (' ').join(path.split('.')[-2].split('\\')[-1].split(' ')[0:1])
    df = pd.read_pickle(path).set_index('date')
    df.index = pd.to_datetime(df.index)
    # df.index = pd.to_datetime(df.index)
    DJI_volume_data[ticker] = df

trading_days = pd.to_datetime(list(DJI_price_data['AAPL'].index))

In [3]:
start_day = '2017-01-01'
end_day = '2022-12-31'
start_day_count = np.where(trading_days >= start_day)[0][0]
end_day_count = np.where(trading_days <= end_day)[0][-1]

In [4]:
returnThresholdOptions = np.arange(10,40,step=5)/100
volumeThresholdOptions = np.arange(-40,50,step=10)/100
windowOptions = [20]

threshold_list = [{'returns' : cR, 'volume': cV} for cR in returnThresholdOptions for cV in volumeThresholdOptions]
thresholdvalues = {'GS1': threshold_list, 'GS2': threshold_list}

validation_start_day = '2022-01-01'
validation_end_day = end_day
validation_start_day_count = np.where(trading_days >= validation_start_day)[0][0]
validation_end_day_count = np.where(trading_days <= validation_end_day)[0][-1]

performance_metrics = ['Annual. Returns',
                    'Annual. Stand. Dev',
                    'Annual. Skew',
                    'Annual. Kurtosis',
                    'Total Returns',
                    'Arith. Returns',
                    'Geom. Returns',
                    'Sharpe Ratio',
                    'Max. Drawdown',
                    'Annual. Turnover',
                    'VaR']

In [5]:
from priceVolumeIndexStrategyMinVariance import *

validationResultsDict = {}
performanceResults = {}

for windowSize in windowOptions:
    # 252
    start = validation_start_day_count - (windowSize + 1)
    end = validation_end_day_count
    trading_days_subset = trading_days[start:end]
    
    sInst = PriceVolumeIndexMinVarStrategy(
                indexDict = DJI_weights_per_quarter,
                trading_days = trading_days_subset,
                priceDict = DJI_price_data,    
                volumeDict = DJI_volume_data,
                lookbackWindow = windowSize,
                numberOfMembers = 30,
                factor = 252.0)
    sInst = sInst.getStrategyWeights(
        methods = ['GS1','GS2'],
        thresholdvalues = thresholdvalues)

    b = sInst.backtestStrategy()
    
    methodList = list(b[windowSize+1].keys())
    methodDict = {'Portfolio Returns': {}, 'Portfolio Value': {}, 'Volume Bought': {}, 'Volume Sold' : {}}
    for method in methodList:
        methodDict['Portfolio Returns'][method] = pd.DataFrame([b[k][method] for k in b.keys()])['Portfolio Returns']
        methodDict['Portfolio Value'][method] = pd.DataFrame([b[k][method] for k in b.keys()])['Portfolio Value']
        methodDict['Volume Bought'][method] = pd.DataFrame([b[k][method] for k in b.keys()])['Volume Bought']
        methodDict['Volume Sold'][method] = pd.DataFrame([b[k][method] for k in b.keys()])['Volume Sold']

    # validationResultsDict[key] = methodDict

    for method in methodList:
        meth = method
        retS = methodDict['Portfolio Returns'][meth]
        dateIndex = trading_days_subset[-retS.shape[0]:]
        retS.index = dateIndex
        valS = methodDict['Portfolio Value'][meth]
        valS.index = dateIndex
        volBS = methodDict['Volume Bought'][meth]
        volBS.index = dateIndex
        volSS = methodDict['Volume Sold'][meth]
        volSS.index = dateIndex

        from performanceAnalysis import *
        perfInst = PerformanceAnalysis(portfolioReturns = retS,
                                    portfolioValue = valS,
                                    volBought = volBS,
                                    volSold = volSS,
                                    factor = 252.0)
        perfRes = perfInst.metricSeries(rf = 0)
        performanceResults[method] = {met : {} for met in performance_metrics}
        for metric in performance_metrics:
            performanceResults[method][metric] = perfRes.loc[metric]

    print('\n')

  from .autonotebook import tqdm as notebook_tqdm
  corG = H / np.dot(h, h.T) # correlation matrix
  corG = H/(T - nNN) # correlation matrix
100%|██████████| 249/249 [1:02:28<00:00, 15.06s/it]
100%|██████████| 249/249 [03:25<00:00,  1.21it/s]






In [6]:
fp = '../../code/min_variance/'
fname = "period_4_" + str(windowOptions[0])
pd.DataFrame(performanceResults).to_pickle(f'{fp}/{fname}.pickle')

In [7]:
pd.DataFrame(performanceResults).loc['Annual. Stand. Dev'].sort_values()

GS2_0.2_-0.2     0.161014
GS2_0.15_-0.2    0.161135
GS2_0.25_-0.2    0.161280
GS2_0.1_-0.2     0.161662
GS2_0.3_-0.2     0.162097
                   ...   
GS1_0.1_0.4      0.175703
GS1_0.35_0.4     0.175807
GS1_0.2_0.4      0.176182
GS1_0.15_0.4     0.176728
GS1_0.3_0.4      0.176793
Name: Annual. Stand. Dev, Length: 108, dtype: float64