In [25]:
import pandas as pd
import numpy as np 
import random
from sklearn.decomposition import PCA
from tqdm import tqdm
import plotly.express as px 
import plotly.io as pio 
pio.renderers.default = "browser"
import ast

# Rolling Subset - new portfolios each time equal weights

In [30]:
################################################################################# PDI Function #################################################################################
def calculate_pdi_ini(num_assets, tickers, weekly_returns, iter_num): 
    
    def meanRetAn(data):             
        Result = 1
        
        for i in data:
            Result *= (1+i)
            
        Result = Result**(1/float(len(data)/52))-1
        
        return(Result)

    pca = PCA()
    PDI_dict = {}
    samples = []
    for i in range(1,iter_num):
        #samples.extend([list(x) for x in combinations(selected_tickers, number_of_assets)])
        samples.append(random.sample(list(tickers),num_assets))
    seen = set()
    samples_mini = [x for x in samples if frozenset(x) not in seen and not seen.add(frozenset(x))]



    
    for i,y in tqdm(zip(samples_mini,range(1,len(samples_mini)+1))):
        #prog = int(y/len(samples_mini)*100)
        #progress_bar.progress(prog)
        #status_text.text("{}% Complete".format(prog))
        n_assets = len(i)
        portfolio_weights_ew = np.repeat(1/n_assets, n_assets)
        port_weekly_return = weekly_returns[i].mul(portfolio_weights_ew,axis=1).sum(axis=1)
        ann_ret = meanRetAn(list(port_weekly_return))
        an_cov = weekly_returns[i].cov()
        port_std = np.sqrt(np.dot(portfolio_weights_ew.T, np.dot(an_cov, portfolio_weights_ew)))*np.sqrt(52)
        corr_matrix = np.array(weekly_returns[i].corr())
        principalComponents = pca.fit(corr_matrix)
        PDI = 2*sum(principalComponents.explained_variance_ratio_*range(1,len(principalComponents.explained_variance_ratio_)+1,1))-1
        PDI_dict[y] = {}
        PDI_dict[y]["PDI_INDEX"] = PDI
        PDI_dict[y]["# of Assets"] = len(i)
        PDI_dict[y]["Assets"] = i
        PDI_dict[y]["Sharpe Ratio"] = ann_ret/port_std
        PDI_dict[y]["Annual Return"] = ann_ret
        PDI_dict[y]["Annual STD"] = port_std
    

        


    PDI_DF = pd.DataFrame(PDI_dict).T
    PDI_DF["Assets"] = PDI_DF["Assets"].astype(str)
    PDI_DF["# of Assets"] = PDI_DF["# of Assets"].astype(str)
    PDI_DF["Sharpe Ratio"] = PDI_DF["Sharpe Ratio"].astype(float)
    PDI_DF["Annual STD"] = PDI_DF["Annual STD"].astype(float)
    PDI_DF["PDI_INDEX"] = PDI_DF["PDI_INDEX"].astype(float)
    PDI_DF["Annual Return"] = PDI_DF["Annual Return"].astype(float)
    return PDI_DF
#######################################################################################################################################################################

def calculate_pdi_roll(portfolio_pdi, weekly_returns, mean_range): 
    
    def meanRetAn(data):             
        Result = 1
        
        for i in data:
            Result *= (1+i)
            
        Result = Result**(1/float(len(data)/mean_range))-1
        
        return(Result)

    pca = PCA()
    PDI_dict = {}
    
    for i,y in tqdm(zip(portfolio_pdi,range(1,len(portfolio_pdi)+1))):
        #prog = int(y/len(samples_mini)*100)
        #progress_bar.progress(prog)
        #status_text.text("{}% Complete".format(prog))
        n_assets = len(i)
        portfolio_weights_ew = np.repeat(1/n_assets, n_assets)
        port_weekly_return = weekly_returns[i].mul(portfolio_weights_ew,axis=1).sum(axis=1)
        ann_ret = meanRetAn(list(port_weekly_return))
        an_cov = weekly_returns[i].corr()
        port_std = np.sqrt(np.dot(portfolio_weights_ew.T, np.dot(an_cov, portfolio_weights_ew)))*np.sqrt(mean_range)
        corr_matrix = np.array(weekly_returns[i].corr())
        principalComponents = pca.fit(corr_matrix)
        PDI = 2*sum(principalComponents.explained_variance_ratio_*range(1,len(principalComponents.explained_variance_ratio_)+1,1))-1
        PDI_dict[y] = {}
        PDI_dict[y]["PDI_INDEX"] = PDI
        PDI_dict[y]["# of Assets"] = len(i)
        PDI_dict[y]["Assets"] = i
        PDI_dict[y]["Sharpe Ratio"] = ann_ret/port_std
        PDI_dict[y]["Annual Return"] = ann_ret
        PDI_dict[y]["Annual STD"] = port_std
    

        


    PDI_DF = pd.DataFrame(PDI_dict).T
    PDI_DF["Assets"] = PDI_DF["Assets"].astype(str)
    PDI_DF["# of Assets"] = PDI_DF["# of Assets"].astype(str)
    PDI_DF["Sharpe Ratio"] = PDI_DF["Sharpe Ratio"].astype(float)
    PDI_DF["Annual STD"] = PDI_DF["Annual STD"].astype(float)
    PDI_DF["PDI_INDEX"] = PDI_DF["PDI_INDEX"].astype(float)
    PDI_DF["Annual Return"] = PDI_DF["Annual Return"].astype(float)

    return PDI_DF

############################################################## Trading Strategy #################################################################################
# Trading algorithm that finds new portfolios each quarter
def pca_per_rolling(return_data, portfolio, interval, ret_range_mean, iter_num, pdi_train):
        data = return_data.copy() # data containing weekly returns
        tickers = list(data.columns)
        data.index = pd.to_datetime(data.index) # Conveting the index which is date to datetime
        weeks_list = data[data.index.year > 2015].index # grabbing all index dates
        data.index = data.index.to_period(interval) # converting the index to quarterly sets
        periods = data.index.unique() # taking the unique quarters to loop

        
        list_range = [] # saving rolling periods
        list_period = periods[4:] # periods of return
        for i in range(1,21): 
            list_range.append(periods[i:4+i])

        #print(periods)
        first_period = list_period[0] # the first period of the time frame
        remaining_periods = list_period[1:] # the remianing periods for returns calculations
        pdi_rolling_periods = list_range[:-1] # all periods minus the last

        ########################################  Function for pdi ########## ########## ########## ########## ########### #########  
        def pdi_period(returns, period, port):
            pca = PCA()
            n_assets = len(port)
            weights = np.repeat(1/n_assets, n_assets)
            corr_matrix = np.array(returns.loc[period][port].mul(weights).cov())
            principalComponents = pca.fit(corr_matrix)
            return 2*sum(principalComponents.explained_variance_ratio_*range(1,len(principalComponents.explained_variance_ratio_)+1,1))-1
        ########## ########## ########## ##########  Mean Annual Return Function ########## ########## ########## ########## ########## 
        def meanRetAn(data):             
            Result = 1
            
            for i in data:
                Result *= (1+i)
                
            Result = Result**(1/float(len(data)/ret_range_mean))-1
            
            return(Result)

        ########## ########## ########## ##########  Portfolio Return ########## ########## ########## ########## ########## ########## 
        
        def port_ret(returns,port, period): # function for calculating returns
            n_assets = len(port)
            portfolio_weights_ew = np.repeat(1/n_assets, n_assets)
            port_return = returns.loc[period][port].mul(portfolio_weights_ew,axis=1).sum(axis=1)
            return  port_return



        pdi_performance = [] #saving performance of max pdi
        sharpe_performance= [] #saving performance of optimal portfolio of max portfolio
        sharpe_2_performance = []
        #equal_performance = [] #saving equal weight portflio performance 


        assets_pdi_performance = [] #weights for max pdi allocation over time
        assets_sharpe_performance= [] #weights for max sharpe ratio allocation over time
        assets_sharpe_2_performance= []  # weights for equal portfolio over time "same all periods"
        periods_weights = [] # saving periods for the weights allocation

        pdi_pdi_performance= [] #pdi for max pdi allocation over time
        pdi_sharpe_performance= []
        pdi_sharpe_2_performance = [] #pdi for max sharpe ratio allocation over time
        #pdi_weights_equal_performance = [] #pdi for equal portfolio over time "same all periods"



        
  
        ############################################################ Calculate first period ####################################################################################
        n_assets = len(portfolio)
        weights = np.repeat(1/n_assets, n_assets)

        period_p = []
        period_p.append(first_period)

        assets_pdi_performance.append(portfolio)
        pdi_performance.extend(port_ret(returns = data,period = first_period,port = portfolio)) # returns of first period
        pdi_pdi_performance.append(pdi_train) # calculating pdi for first period

        assets_sharpe_2_performance.append(portfolio)
        sharpe_2_performance.extend(port_ret(returns = data,period = first_period,port = portfolio))    # returns of first period
        pdi_sharpe_2_performance.append(pdi_train)# calculating pdi for first period


        assets_sharpe_performance.append(portfolio)
        sharpe_performance.extend(port_ret(returns = data,period = first_period,port = portfolio))    # returns of first period
        pdi_sharpe_performance.append(pdi_train)# calculating pdi for first period

        #equal_performance.extend(port_ret(returns = data,period = first_period,port = portfolio))   # returns of first period
        #pdi_weights_equal_performance.append(pdi_period(returns = data,period = first_period,port = portfolio))# calculating pdi for first period

        periods_weights.append(first_period) # saving first period

        tickers_weekly = list(data.columns)
        #number_of_assets = [len(initial_port)]







        ######################################################## Calculation of portfolio perfomnce #############################################################################

        for init_time, next_time in zip(pdi_rolling_periods, remaining_periods):

    
        ############ Portfolio Creation #############################
            samples = []
            for i in range(1,iter_num):
                #samples.extend([list(x) for x in combinations(selected_tickers, number_of_assets)])
                samples.append(random.sample(tickers,len(portfolio)))
            
            seen = set()
            samples_mini = [x for x in samples if frozenset(x) not in seen and not seen.add(frozenset(x))]






            print("Rolling range for calculatio: {} - period of return: {}".format(init_time, next_time))
            PDI_DF = calculate_pdi_roll(portfolio_pdi = samples_mini ,weekly_returns = data.loc[init_time].dropna(axis=1), mean_range = ret_range_mean)
            period_p.append(next_time)
            ################################################## Taking the higest PDI ##########################################################################

            id_1 = PDI_DF["PDI_INDEX"].idxmax()
            port_max_pdi_assets = ast.literal_eval(PDI_DF["Assets"][id_1]) # getting weights for period
            port_max_pdi_assets_pdi = PDI_DF["PDI_INDEX"][id_1]

            assets_pdi_performance.append(port_max_pdi_assets) # saving weights for period
            port_max_ret_period = port_ret(returns = data, port= port_max_pdi_assets,period = next_time)  # calculating return for periods 
            pdi_performance.extend(port_max_ret_period) # saving return 
            pdi_pdi_performance.append(port_max_pdi_assets_pdi) # calculating pdi for first period

            ################################################## Taking the higest Sharpe Ration - PDI ##########################################################################
            id_sharpe = PDI_DF["Sharpe Ratio"].idxmax()
            port_max_sharpe_assets = ast.literal_eval(PDI_DF["Assets"][id_sharpe]) # getting weights for period
            port_max_sharpe_assets_pdi = PDI_DF["PDI_INDEX"][id_sharpe]

            assets_sharpe_performance.append(port_max_sharpe_assets)   # saving weights for periods
            port_max_ret_period_sharpe = port_ret(returns = data, port= port_max_sharpe_assets,period = next_time)  # calculating return for periods 
            sharpe_performance.extend(port_max_ret_period_sharpe) # saving return 
            pdi_sharpe_performance.append(port_max_sharpe_assets_pdi) # calculating pdi for first period


            ################################################## Taking the higest Sharpe Ration - PDI above 2 ##########################################################################
            id_sharpe_2 = PDI_DF["Sharpe Ratio"].idxmax()
            port_max_sharpe_assets_2 = ast.literal_eval(PDI_DF["Assets"][id_sharpe_2]) # getting weights for period
            port_max_sharpe_2_assets_pdi = PDI_DF["PDI_INDEX"][id_sharpe_2]

            assets_sharpe_2_performance.append(port_max_sharpe_assets_2)   # saving weights for periods
            port_max_ret_period_sharpe_2 = port_ret(returns = data, port= port_max_sharpe_assets_2,period = next_time)  # calculating return for periods 
            sharpe_2_performance.extend(port_max_ret_period_sharpe_2) # saving return 
            pdi_sharpe_2_performance.append(port_max_sharpe_2_assets_pdi) # calculating pdi for first period




        performance_frame = pd.DataFrame()
        performance_frame["Time"] = weeks_list
        performance_frame["Max PDI"] = pdi_performance
        performance_frame["Max Sharpe Ratio"] = sharpe_performance
        performance_frame["Max Sharpe Ratio - PDI 2"] = sharpe_2_performance
        performance_frame["Max PDI Cummulative"] = performance_frame["Max PDI"].cumsum(axis=0) # cummulative returns max pdi
        performance_frame["Max Sharpe Ratio Cummulative"] = performance_frame["Max Sharpe Ratio"].cumsum(axis=0) #cummulative return sharpe ratio
        performance_frame["Max Sharpe Ratio Cummulative - PDI 2"] = performance_frame["Max Sharpe Ratio - PDI 2"].cumsum(axis=0) #cummulative return sharpe ratio

        weights_frame = pd.DataFrame()
        weights_frame["Period"] = period_p
        weights_frame["Max PDI - Assets"] = assets_pdi_performance
        weights_frame["Max Sharpe - Assets"] = assets_sharpe_performance
        weights_frame["Max Sharpe - Assets PDI 2"] = assets_sharpe_2_performance
        weights_frame["Max PDI - PDI Rolling 52 weeks"] = pdi_pdi_performance
        weights_frame["Max sharpe - PDI Rolling 52 weeks"] = pdi_sharpe_performance
        weights_frame["Max sharpe - PDI 2 Rolling 52 weeks"] = pdi_sharpe_2_performance








        return performance_frame, weights_frame

In [31]:
returns_r = pd.read_csv("weeklyReturns.csv", index_col="Date") # loading returns dataframe
returns_r.index = pd.to_datetime(returns_r.index) # converting returns dataframe to datetime
returns_r_train = returns_r[returns_r.index.year <= 2015] # training on data from 2015
mst = pd.read_csv("MST_Universe.csv",index_col="Ticker")
mst = mst.loc[mst.index != "TUSA"]
ticks = list(mst.index)
return_train = returns_r_train[ticks]
       

In [32]:
#Getting Data

returns_weekly = pd.read_csv("weeklyReturns.csv", index_col="Date") # loading returns dataframe
returns_weekly = returns_weekly[['IWO', 'IHI', 'VIOG', 'PSJ', 'XMHQ', 'BBP', 'ARKW', 'DWAS', 'FYC', 'FEMS', 'XTN', 'KRE', 'IYH', 'IAK', 'SIZE', 'IAT']]
returns_weekly.index = pd.to_datetime(returns_weekly.index) # converting returns dataframe to datetime

In [33]:
np.random.seed(42)
random.seed(42)
#Defining training data
train_return = returns_weekly[returns_weekly.index.year <= 2015] # training on data from 2015
dict_per = {}

for ii in ["PDI_INDEX","Sharpe Ratio"]:
    dict_per[ii] = {}
    for i in [5,6]:
        dict_per[ii][i] = {}
        print("---------------------- Calculating diversification and performnce for training period -------------------------------")
        pdi_train = calculate_pdi_ini(num_assets = i, tickers = list(train_return.columns) , weekly_returns = train_return, iter_num= 50000) # training on data from 2015 - getting max PDI portflio

        id_index = pdi_train[ii].idxmax() # getting index id for max pdi
        id_index_pdi = pdi_train["PDI_INDEX"][id_index]
        assets_port = ast.literal_eval(pdi_train.loc[id_index]["Assets"]) # max pdi portfolio

        print("---------------------- Selcted Portfolio -------------------------------")
        print(assets_port)
        print("------------------------------------------------------------------------")
        ini_porti = assets_port # starting portfolio 
        print("-----------------------------------  Calculating Performance of selected portfolio -----------------------------------")
        performance, weights = pca_per_rolling(return_data = returns_weekly, portfolio = ini_porti, interval = "Q", ret_range_mean = 52, iter_num = 50000, pdi_train=id_index_pdi) # running strategy for 2016 and   forward

        print("----------------------------------- Done -----------------------------------")
        print("----------------------------------- Access dataframes - performance and weights  -----------------------------------")
        dict_per[ii][i]["performance"] = performance
        dict_per[ii][i]["weights"] = weights

---------------------- Calculating diversification and performnce for training period -------------------------------
---------------------- Selcted Portfolio -------------------------------
['BBP', 'FEMS', 'ARKW', 'KRE', 'XTN']
------------------------------------------------------------------------
-----------------------------------  Calculating Performance of selected portfolio -----------------------------------
Rolling range for calculatio: PeriodIndex(['2015Q2', '2015Q3', '2015Q4', '2016Q1'], dtype='period[Q-DEC]', name='Date', freq='Q-DEC') - period of return: 2016Q2
Rolling range for calculatio: PeriodIndex(['2015Q3', '2015Q4', '2016Q1', '2016Q2'], dtype='period[Q-DEC]', name='Date', freq='Q-DEC') - period of return: 2016Q3
Rolling range for calculatio: PeriodIndex(['2015Q4', '2016Q1', '2016Q2', '2016Q3'], dtype='period[Q-DEC]', name='Date', freq='Q-DEC') - period of return: 2016Q4
Rolling range for calculatio: PeriodIndex(['2016Q1', '2016Q2', '2016Q3', '2016Q4'], dtype='perio

4368it [00:10, 419.27it/s]
4368it [00:10, 408.62it/s]
4368it [00:10, 430.07it/s]
4368it [00:10, 431.03it/s]
4368it [00:10, 429.09it/s]
4368it [00:10, 433.90it/s]
4368it [00:10, 425.82it/s]
4368it [00:10, 429.42it/s]
4368it [00:10, 431.45it/s]
3513it [00:08, 412.27it/s]


KeyboardInterrupt: 

In [24]:
px.scatter(pdi_train, x="PDI_INDEX", y="Sharpe Ratio", color="Annual STD")

In [12]:
for i in [4,5,6]:
    fig = px.line(dict_per[i]["performance"], x="Time", y = ["Max PDI Cummulative", "Max Sharpe Ratio Cummulative"])
    fig.show()      
    

In [13]:
for i in [4,5,6]:
    print(dict_per[i]["performance"][["Max PDI","Max Sharpe Ratio"]].std(axis=0)*np.sqrt(52))


Max PDI             0.222408
Max Sharpe Ratio    0.205929
dtype: float64
Max PDI             0.217640
Max Sharpe Ratio    0.199287
dtype: float64
Max PDI             0.216081
Max Sharpe Ratio    0.198077
dtype: float64


In [19]:
def meanRetAn(data):             
    Result = 1
    
    for i in range(len(data.index)):
        Result *= (1+data.iloc[i,:])
        
    Result = Result**(1/float(len(data.index)/52))-1
     
    return(Result)

In [20]:
anret = {}
for i in [4,5,6]:
    anret[i] = meanRetAn(dict_per[i]["performance"][["Max PDI","Max Sharpe Ratio"]])

In [21]:
for i in [4,5,6]:
    print("---------------------------------")
    print("Number of assets",i)
    for z in ["Max PDI","Max Sharpe Ratio"]:
        print(z,anret[i][z]/(dict_per[i]["performance"][z].std(axis=0)*np.sqrt(52)))

---------------------------------
Number of assets 4
Max PDI 0.6931978183767277
Max Sharpe Ratio 0.6722212765157859
---------------------------------
Number of assets 5
Max PDI 0.8635633985100297
Max Sharpe Ratio 0.8027144330295413
---------------------------------
Number of assets 6
Max PDI 0.9430989575126766
Max Sharpe Ratio 0.8019865229324924


In [15]:
for i in [4,5,6]:
    print(dict_per[i]["weights"])

           Max PDI - Assets      Max Sharpe - Assets  \
0     [XTN, ARKW, BBP, IAT]    [XTN, ARKW, BBP, IAT]   
1    [FEMS, XTN, ARKW, BBP]    [PSJ, IHI, IAK, SIZE]   
2    [XMHQ, ARKW, IAK, IHI]    [IHI, PSJ, SIZE, IAK]   
3     [ARKW, IAT, XTN, IYH]   [FEMS, SIZE, IHI, PSJ]   
4     [KRE, IWO, XTN, XMHQ]     [KRE, XTN, IAT, IAK]   
5   [ARKW, SIZE, IHI, FEMS]   [FEMS, IAT, ARKW, BBP]   
6    [XTN, IAK, DWAS, SIZE]   [FEMS, ARKW, BBP, IAK]   
7     [IHI, BBP, XTN, ARKW]  [IAK, ARKW, SIZE, FEMS]   
8   [FEMS, ARKW, BBP, VIOG]   [IAK, IHI, FEMS, ARKW]   
9   [VIOG, FEMS, BBP, ARKW]   [FEMS, PSJ, IAK, ARKW]   
10   [PSJ, BBP, XMHQ, FEMS]   [ARKW, PSJ, KRE, XMHQ]   
11    [FEMS, XTN, PSJ, BBP]   [XMHQ, VIOG, PSJ, IHI]   
12   [FEMS, XMHQ, XTN, BBP]    [ARKW, IYH, PSJ, IHI]   
13   [XMHQ, IYH, ARKW, BBP]    [IYH, PSJ, IHI, SIZE]   
14    [SIZE, PSJ, IYH, BBP]    [PSJ, IAK, IHI, SIZE]   
15   [XMHQ, PSJ, FEMS, BBP]    [IHI, SIZE, PSJ, IAK]   
16    [IAK, BBP, FEMS, PSJ]    [PSJ, FEMS, IHI, 