In [351]:
import pandas as pd
import numpy as np 
import random
from sklearn.decomposition import PCA
from tqdm import tqdm
import plotly.express as px 
import plotly.io as pio 
pio.renderers.default = "browser"
import ast

# Weights Stragety

In [420]:
#Finding the initial portoflio from desired investemnt universe 

def calculate_pdi(num_assets, tickers, weekly_returns): 
        
        def meanRetAn(data):             
            Result = 1
            
            for i in data:
                Result *= (1+i)
                
            Result = Result**(1/float(len(data)/52))-1
            
            return(Result)

        pca = PCA()
        PDI_dict = {}
        samples = []
        for number in [num_assets]:
            for i in range(1,50000):
                #samples.extend([list(x) for x in combinations(selected_tickers, number_of_assets)])
                samples.append(random.sample(list(tickers),number))
        seen = set()
        samples_mini = [x for x in samples if frozenset(x) not in seen and not seen.add(frozenset(x))]


        
        for i,y in tqdm(zip(samples_mini,range(1,len(samples_mini)+1))):
            #prog = int(y/len(samples_mini)*100)
            #progress_bar.progress(prog)
            #status_text.text("{}% Complete".format(prog))
            n_assets = len(i)
            portfolio_weights_ew = np.repeat(1/n_assets, n_assets)
            port_weekly_return = weekly_returns[i].mul(portfolio_weights_ew,axis=1).sum(axis=1)
            ann_ret = meanRetAn(list(port_weekly_return))
            an_cov = weekly_returns[i].cov()
            port_std = np.sqrt(np.dot(portfolio_weights_ew.T, np.dot(an_cov, portfolio_weights_ew)))*np.sqrt(52)
            corr_matrix = np.array(weekly_returns[i].corr())
            principalComponents = pca.fit(corr_matrix)
            PDI = 2*sum(principalComponents.explained_variance_ratio_*range(1,len(principalComponents.explained_variance_ratio_)+1,1))-1
            PDI_dict[y] = {}
            PDI_dict[y]["PDI_INDEX"] = PDI
            PDI_dict[y]["# of Assets"] = len(i)
            PDI_dict[y]["Assets"] = i
            PDI_dict[y]["Sharpe Ratio"] = ann_ret/port_std
            PDI_dict[y]["Annual Return"] = ann_ret
            PDI_dict[y]["Annual STD"] = port_std
        

            


        PDI_DF = pd.DataFrame(PDI_dict).T
        PDI_DF["Assets"] = PDI_DF["Assets"].astype(str)
        PDI_DF["# of Assets"] = PDI_DF["# of Assets"].astype(str)
        PDI_DF["Sharpe Ratio"] = PDI_DF["Sharpe Ratio"].astype(float)
        PDI_DF["Annual STD"] = PDI_DF["Annual STD"].astype(float)
        PDI_DF["PDI_INDEX"] = PDI_DF["PDI_INDEX"].astype(float)
        PDI_DF["Annual Return"] = PDI_DF["Annual Return"].astype(float)

        return PDI_DF

############################################################## Trading Strategy #################################################################################
# Trading algorithm that uses the portfolio chosen, and allocated weights accordingly
def calculate_pdi_weights( returns,return_mean_range): 

    n = len(returns.columns)
    w = [[(100/n)/100]*n]
    for i in range(1,20000):
        weights = [random.random() for _ in range(n)]
        sum_weights = sum(weights)
        weights = [1*w/sum_weights for w in weights]
        w.append(list(np.round(weights,2)))
    weights_new = []
    for i in w:
        if i not in weights_new:
            weights_new.append(i)


    def meanRetAn(data):             
        Result = 1
        
        for i in data:
            Result *= (1+i)
            
        Result = Result**(1/float(len(data)/return_mean_range))-1
        
        return(Result)

    pca = PCA()
    PDI_dict = {}

    for y,num in tqdm(zip(weights_new, range(0,len(weights_new),1))):
        
        port_ret  = returns.mul(y,axis=1).sum(axis=1)

        ann_ret = meanRetAn(list(port_ret))
        an_cov = returns.cov()
        port_std = np.sqrt(np.dot(np.array(y).T, np.dot(an_cov, y)))*np.sqrt(return_mean_range)
        corr_matrix = np.array(returns.mul(y).cov())
        principalComponents = pca.fit(corr_matrix)
        PDI = 2*sum(principalComponents.explained_variance_ratio_*range(1,len(principalComponents.explained_variance_ratio_)+1,1))-1

        PDI_dict[num ] = {}
        PDI_dict[num ]["PDI_INDEX"] = PDI
        PDI_dict[num ]["# of Assets"] = len(y)
        PDI_dict[num ]["Sharpe Ratio"] = ann_ret/port_std
        PDI_dict[num ]["Annual Return"] = ann_ret
        PDI_dict[num ]["weights"] = y
        PDI_dict[num ]["Annual STD"] = port_std

    df = pd.DataFrame(PDI_dict).T
    df["PDI_INDEX"] = df["PDI_INDEX"].astype(float)
    df["Sharpe Ratio"] = df["Sharpe Ratio"].astype(float)
    df["Annual Return"] = df["Annual Return"].astype(float)
    df["Annual STD"] = df["Annual STD"].astype(float)

    return df
############################################################## Trading Strategy #################################################################################
def pca_per_weights(return_data, portfolio, interval, ret_range_mean):
        data = return_data.copy() # data containing weekly returns
        tickers = list(data.columns)
        data.index = pd.to_datetime(data.index) # Conveting the index which is date to datetime
        weeks_list = data.index # grabbing all index dates
        data.index = data.index.to_period(interval) # converting the index to quarterly sets
        periods = data.index.unique() # taking the unique quarters to loop

        
        #print(periods)
        first_period = periods[0] # the first period of the time frame
        remaining_periods = periods[1:] # the remianing periods of the time framr
        first_periods = periods[:-1] # all periods minus the last

        def pdi_period(returns, period, weights):
            pca = PCA()
            corr_matrix = np.array(returns.loc[period].mul(weights).cov())
            principalComponents = pca.fit(corr_matrix)
            return 2*sum(principalComponents.explained_variance_ratio_*range(1,len(principalComponents.explained_variance_ratio_)+1,1))-1

        def meanRetAn(data):             
            Result = 1
            
            for i in data:
                Result *= (1+i)
                
            Result = Result**(1/float(len(data)/ret_range_mean))-1
            
            return(Result)

        
        def port_ret(returns, period, weights): # function for calculating returns
            portfolio_weights_ew = weights
            port_return = returns.loc[period].mul(portfolio_weights_ew,axis=1).sum(axis=1)
            return  port_return

        pdi_performance_w = [] #saving performance of max pdi
        sharpe_performance_w= [] #saving performance of optimal portfolio of max portfolio
        equal_performance = [] #saving equal weight portflio performance 


        weights_pdi_performance_w = [] #weights for max pdi allocation over time
        weights_sharpe_performance_w= [] #weights for max sharpe ratio allocation over time
        weights_equal_performance = [] # weights for equal portfolio over time "same all periods"
        periods_weights = [] # saving periods for the weights allocation

        pdi_weights_pdi_performance_w = [] #pdi for max pdi allocation over time
        pdi_weights_sharpe_performance_w= [] #pdi for max sharpe ratio allocation over time
        pdi_weights_equal_performance = [] #pdi for equal portfolio over time "same all periods"



        
        assets = [] # store asstes for all periods
        assets.append(portfolio) # appending portfolio
        ############################################################ Calculate first period ####################################################################################
        #### Weighted #####
        print(first_period)
        n_assets = len(portfolio) # equal weigt initialisation



        ini_weights = np.repeat(1/n_assets, n_assets) # equal weigt initialisation

        pdi_performance_w.extend(port_ret(data,first_period,ini_weights)) # returns of first period
        weights_pdi_performance_w.append(ini_weights) # saving weights of first period 
        pdi_weights_pdi_performance_w.append(pdi_period(data,first_period,ini_weights)) # calculating pdi for first period

        sharpe_performance_w.extend(port_ret(data,first_period,ini_weights))    # returns of first period
        weights_sharpe_performance_w.append(ini_weights)     # saving weights of first period 
        pdi_weights_sharpe_performance_w.append(pdi_period(data,first_period,ini_weights))# calculating pdi for first period

        equal_performance.extend(port_ret(data,first_period,ini_weights))   # returns of first period
        weights_equal_performance.append(ini_weights)    # saving weights of first period 
        pdi_weights_equal_performance.append(pdi_period(data,first_period,ini_weights))# calculating pdi for first period

        periods_weights.append(first_period) # saving first period

        tickers_weekly = list(data.columns)
        #number_of_assets = [len(initial_port)]







        ######################################################## Calculation of portfolio perfomnce #############################################################################

        for init_time, next_time in zip(first_periods,remaining_periods):
            ############ Portfolio Creation ##############################
            print("first time: {} - last time: {}".format(init_time, next_time))
            PDI_DF = calculate_pdi_weights(returns = data.loc[init_time].dropna(axis=1), return_mean_range = ret_range_mean)
            periods_weights.append(next_time) # saving first period
            assets.append(portfolio) # appending portfolio

            ################################################## Taking the higest PDI ##########################################################################

            id = PDI_DF["PDI_INDEX"].idxmax()
            port_max_pdi_weights = PDI_DF["weights"][id] # getting weights for period
            
            weights_pdi_performance_w.append(port_max_pdi_weights) # saving weights for period
            port_max_ret_period = port_ret(data, next_time,port_max_pdi_weights)  # calculating return for periods 
            pdi_performance_w.extend(port_max_ret_period) # saving return 
            pdi_weights_pdi_performance_w.append(pdi_period(data,next_time,port_max_pdi_weights)) # calculating pdi for first period

            ################################################## Taking the higest Sharpe Ration - PDI ##########################################################################
            id_sharpe = PDI_DF["Sharpe Ratio"].idxmax()
            port_max_sharpe_weights = PDI_DF["weights"][id_sharpe] # getting weights for period

            weights_sharpe_performance_w.append(port_max_sharpe_weights)   # saving weights for periods
            port_max_ret_period_sharpe = port_ret(data, next_time,port_max_sharpe_weights)  # calculating return for periods 
            sharpe_performance_w.extend(port_max_ret_period_sharpe) # saving return 
            pdi_weights_sharpe_performance_w.append(pdi_period(data,next_time,port_max_sharpe_weights)) # calculating pdi for first period

            ################################################################## Equal Weigths Portoflio ##########################################################################

            equal_weights = PDI_DF.iloc[0]["weights"] # getting weights for period
            

            weights_equal_performance.append(equal_weights)  # saving weights for periods
            port_max_ret_period_equal = port_ret(data, next_time,equal_weights) # calculating return for periods 
            equal_performance.extend(port_max_ret_period_equal) # saving return 
            pdi_weights_equal_performance.append(pdi_period(data,next_time,equal_weights)) # calculating pdi for first period






        performance_frame = pd.DataFrame()
        performance_frame["Time"] = weeks_list
        performance_frame["Equal Weights"] = equal_performance
        performance_frame["Max PDI Weights"] = pdi_performance_w
        performance_frame["Max Sharpe Ratio Weights"] = sharpe_performance_w
        performance_frame["Equal Weights Cummulative"] = performance_frame["Equal Weights"].cumsum(axis=0)
        performance_frame["Max PDI Weights Cummulative"] = performance_frame["Max PDI Weights"].cumsum(axis=0) # cummulative returns max pdi
        performance_frame["Max Sharpe Ratio Weights Cummulative"] = performance_frame["Max Sharpe Ratio Weights"].cumsum(axis=0) #cummulative return sharpe ratio

        weights_frame = pd.DataFrame()
        weights_frame["Period"] = periods_weights
        weights_frame["Weights max PDI"] = weights_pdi_performance_w
        weights_frame["Weights max sharpe"] = weights_sharpe_performance_w
        weights_frame["Weights equal"] = weights_equal_performance
        weights_frame["Weights max PDI - PDI"] = pdi_weights_pdi_performance_w
        weights_frame["Weights max sharpe - PDI"] = pdi_weights_sharpe_performance_w
        weights_frame["Weights equal - PDI"] = pdi_weights_equal_performance
        weights_frame["Assets"] = assets







        return performance_frame, weights_frame

In [421]:
#Getting Data
our_uni = pd.read_csv("our_uni.csv", index_col="Ticker") # our defined universe
returns_weekly = pd.read_csv("weeklyReturns.csv", index_col="Date") # loading returns dataframe
returns_weekly = returns_weekly[list(our_uni.index)]
returns_weekly.index = pd.to_datetime(returns_weekly.index) # converting returns dataframe to datetime

In [354]:

#Defining training data
train_return = returns_weekly[returns_weekly.index.year <= 2016] # training on data from 2015


print("---------------------- Calculating diversification and performnce for training period -------------------------------")
pdi_train = calculate_pdi(num_assets = 5, tickers = list(train_return.columns) , weekly_returns = train_return) # training on data from 2015 - getting max PDI portflio

id_index = pdi_train["PDI_INDEX"].idxmax() # getting index id for max pdi
assets_port = ast.literal_eval(pdi_train.loc[id_index]["Assets"]) # max pdi portfolio
print("---------------------- Selcted Portfolio -------------------------------")
print(assets_port)
print("------------------------------------------------------------------------")
ini_porti = assets_port # starting portfolio 
test_retuns = returns_weekly[returns_weekly.index.year >= 2017] # defining test data 
test_week = test_retuns[assets_port] #returns for selected portfolio
print("-----------------------------------  Calculating Performance of selected portfolio -----------------------------------")
performance, weights = pca_per_weights(return_data = test_week, portfolio = ini_porti, interval = "Q", ret_range_mean = 12) # running strategy for 2016 and forward

print("----------------------------------- Done -----------------------------------")
print("----------------------------------- Access dataframes - performance and weights  -----------------------------------")

---------------------- Calculating diversification and performnce for training period -------------------------------
49861it [01:56, 426.34it/s]
---------------------- Selcted Portfolio -------------------------------
['DWAS', 'KIE', 'RYT', 'FIW', 'XTN']
------------------------------------------------------------------------
-----------------------------------  Calculating Performance of selected portfolio -----------------------------------
2017Q1
first time: 2017Q1 - last time: 2017Q2
49622it [01:09, 710.89it/s]
first time: 2017Q2 - last time: 2017Q3
49647it [01:10, 709.20it/s]
first time: 2017Q3 - last time: 2017Q4
49655it [01:09, 711.38it/s]
first time: 2017Q4 - last time: 2018Q1
49612it [01:09, 713.13it/s]
first time: 2018Q1 - last time: 2018Q2
49659it [09:14, 89.58it/s] 
first time: 2018Q2 - last time: 2018Q3
49628it [01:09, 712.26it/s]
first time: 2018Q3 - last time: 2018Q4
49649it [01:09, 713.99it/s]
first time: 2018Q4 - last time: 2019Q1
49638it [01:10, 709.02it/s]
first tim