In [2]:
import numpy as np 
import pandas as pd 
import datetime as dt 
import yfinance as yf 


from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

import plotly.graph_objects as go
from plotly.subplots import make_subplots
temp = dict(layout = go.Layout(font = dict(family="Franklin Gothic", size=12), width = 1500))

# ***Fetch Historical Dataset***

In [3]:
tickers_table = pd.read_html("https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average")[1].sort_values(by = 'Industry').reset_index(drop = True)
tickers       = tickers_table['Symbol'].to_list()
print(tickers)

['BA', 'AMGN', 'DIS', 'DOW', 'NKE', 'MMM', 'HON', 'CAT', 'KO', 'PG', 'GS', 'V', 'AXP', 'JPM', 'MCD', 'HD', 'CRM', 'MSFT', 'IBM', 'CSCO', 'AAPL', 'TRV', 'UNH', 'CVX', 'MRK', 'JNJ', 'WBA', 'WMT', 'INTC', 'VZ']


In [4]:
start_date = '2022-09-01'
end_date   = '2023-09-01'

# yfinance likes the tickers formatted as a list
ticks = yf.Tickers(tickers)
stock_df = ticks.history(start=start_date, end=end_date).Close

stock_df.tail()

[*********************100%%**********************]  30 of 30 completed


Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,DOW,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-08-25,178.610001,256.380005,158.320007,223.410004,272.559998,209.470001,55.700001,159.119995,83.360001,53.295967,...,110.209999,322.980011,98.50959,153.539993,160.633591,487.223267,242.570007,33.349998,25.26,157.820007
2023-08-28,180.190002,256.549988,159.190002,227.059998,274.790009,211.720001,56.200001,160.210007,84.160004,53.908112,...,108.93,323.700012,99.296944,153.779999,159.828629,489.374908,243.830002,33.549999,25.17,158.720001
2023-08-29,184.119995,259.640015,160.240005,227.25,280.029999,211.960007,56.560001,159.960007,84.400002,54.540005,...,109.989998,328.410004,101.429794,153.869995,160.275833,490.918884,245.339996,34.689999,25.540001,160.050003
2023-08-30,187.649994,257.880005,160.169998,228.850006,282.329987,215.039993,56.810001,160.179993,84.279999,54.75,...,110.209999,328.790009,101.75869,154.039993,160.315598,489.623932,246.229996,34.639999,25.6,161.199997
2023-08-31,187.869995,256.339996,157.990005,224.029999,281.130005,221.460007,57.349998,161.100006,83.68,54.560001,...,108.980003,327.76001,101.369995,154.339996,160.226135,474.731903,245.679993,34.98,25.309999,162.610001


In [5]:
scaler.fit(stock_df)

standardized_stock_df = pd.DataFrame(scaler.transform(stock_df), columns = stock_df.columns, index = stock_df.index)
standardized_stock_df

Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,DOW,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-09-01,-0.136622,-0.220292,-0.784812,-1.381089,-1.677634,-0.843279,-0.976072,-1.227031,2.154773,-0.827976,...,-2.315149,-0.471106,-0.491177,-0.830033,-1.493526,0.871678,-1.146594,1.435088,0.148258,-1.465072
2022-09-02,-0.250197,-0.385424,-0.897266,-1.443760,-1.731172,-0.837920,-1.147581,-0.984099,1.979933,-0.975506,...,-2.417100,-0.577289,-0.529659,-1.098153,-1.554069,0.527513,-1.282065,1.188544,0.151124,-1.631345
2022-09-06,-0.317813,-0.393337,-0.998639,-1.424346,-1.736826,-0.903913,-1.176981,-1.060872,1.834013,-1.138924,...,-2.396046,-0.646039,-0.580680,-1.263565,-1.636811,0.587349,-1.231763,1.091858,0.165446,-1.705494
2022-09-07,-0.242273,-0.257751,-0.687696,-1.303091,-1.730175,-0.851654,-1.042225,-1.272253,2.175807,-1.052674,...,-2.346177,-0.527623,-0.292707,-1.041936,-1.420874,0.767305,-1.130018,1.082191,0.383156,-1.323515
2022-09-08,-0.321509,-0.222930,-0.579502,-1.240419,-1.654024,-0.730388,-0.956472,-1.183916,2.128482,-0.955078,...,-2.285229,-0.517101,-0.271087,-1.022475,-1.276770,1.029589,-1.113441,1.198211,0.348780,-1.246000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-25,1.010184,0.808986,0.024920,0.994635,1.518771,1.030655,2.021682,-0.240322,-1.679882,0.587408,...,0.589898,1.136339,-1.047375,1.107421,-1.304987,-0.390361,1.390891,-1.546630,-2.285964,1.428527
2023-08-28,1.094143,0.818256,0.100110,1.118956,1.594560,1.106028,2.148202,-0.121510,-1.574715,0.733906,...,0.443990,1.154125,-0.978427,1.134048,-1.388505,-0.291828,1.463493,-1.443301,-2.313279,1.531186
2023-08-29,1.302976,0.986767,0.190857,1.125427,1.772647,1.114068,2.239296,-0.148760,-1.543165,0.885130,...,0.564820,1.270475,-0.791652,1.144033,-1.342106,-0.221123,1.550500,-0.854324,-2.200985,1.682894
2023-08-30,1.490554,0.890787,0.184807,1.179924,1.850814,1.217244,2.302556,-0.124781,-1.558940,0.935386,...,0.589898,1.279862,-0.762850,1.162893,-1.337980,-0.280424,1.601782,-0.880156,-2.182775,1.814069


In [6]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(x = stock_df.index, 
            y = stock_df['AAPL'], 
            name = 'AAPL'))

fig.add_trace(
    go.Scatter(x = standardized_stock_df.index, 
            y = standardized_stock_df['AAPL'], 
            name = 'Standardization AAPL',
            yaxis = "y2"))


fig.update_layout(template = temp,
                title = "Original Price v.s. Standardization Price",
                hovermode = 'closest',
                margin = dict(l = 20, r = 0, t = 30, b = 20),
                height = 350, 
                width = 800, 
                showlegend = True,
                xaxis = dict(tickfont = dict(size = 10)),
                yaxis = dict(side = "left", tickfont = dict(size=10)),
                yaxis2 = dict(side = "right", overlaying = "y", tickfont = dict(size=10) ),) 


In [7]:
standardized_stock_df

Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,DOW,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-09-01,-0.136622,-0.220292,-0.784812,-1.381089,-1.677634,-0.843279,-0.976072,-1.227031,2.154773,-0.827976,...,-2.315149,-0.471106,-0.491177,-0.830033,-1.493526,0.871678,-1.146594,1.435088,0.148258,-1.465072
2022-09-02,-0.250197,-0.385424,-0.897266,-1.443760,-1.731172,-0.837920,-1.147581,-0.984099,1.979933,-0.975506,...,-2.417100,-0.577289,-0.529659,-1.098153,-1.554069,0.527513,-1.282065,1.188544,0.151124,-1.631345
2022-09-06,-0.317813,-0.393337,-0.998639,-1.424346,-1.736826,-0.903913,-1.176981,-1.060872,1.834013,-1.138924,...,-2.396046,-0.646039,-0.580680,-1.263565,-1.636811,0.587349,-1.231763,1.091858,0.165446,-1.705494
2022-09-07,-0.242273,-0.257751,-0.687696,-1.303091,-1.730175,-0.851654,-1.042225,-1.272253,2.175807,-1.052674,...,-2.346177,-0.527623,-0.292707,-1.041936,-1.420874,0.767305,-1.130018,1.082191,0.383156,-1.323515
2022-09-08,-0.321509,-0.222930,-0.579502,-1.240419,-1.654024,-0.730388,-0.956472,-1.183916,2.128482,-0.955078,...,-2.285229,-0.517101,-0.271087,-1.022475,-1.276770,1.029589,-1.113441,1.198211,0.348780,-1.246000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-25,1.010184,0.808986,0.024920,0.994635,1.518771,1.030655,2.021682,-0.240322,-1.679882,0.587408,...,0.589898,1.136339,-1.047375,1.107421,-1.304987,-0.390361,1.390891,-1.546630,-2.285964,1.428527
2023-08-28,1.094143,0.818256,0.100110,1.118956,1.594560,1.106028,2.148202,-0.121510,-1.574715,0.733906,...,0.443990,1.154125,-0.978427,1.134048,-1.388505,-0.291828,1.463493,-1.443301,-2.313279,1.531186
2023-08-29,1.302976,0.986767,0.190857,1.125427,1.772647,1.114068,2.239296,-0.148760,-1.543165,0.885130,...,0.564820,1.270475,-0.791652,1.144033,-1.342106,-0.221123,1.550500,-0.854324,-2.200985,1.682894
2023-08-30,1.490554,0.890787,0.184807,1.179924,1.850814,1.217244,2.302556,-0.124781,-1.558940,0.935386,...,0.589898,1.279862,-0.762850,1.162893,-1.337980,-0.280424,1.601782,-0.880156,-2.182775,1.814069


# ***Basic Euclidean Distance Approach***

In [8]:
def euclidean_distance(vector_1, vector_2):

    ssd = np.linalg.norm(vector_1 - vector_2)

    return ssd 

euclidean_distance(standardized_stock_df['AAPL'], standardized_stock_df['AXP'])

14.856802145696015

In [9]:
from scipy.spatial.distance import pdist, squareform

# Assuming your data is stored in the 'data' array
distances = pdist(standardized_stock_df.T, metric='euclidean')
distances_matrix = squareform(distances)

euclidean_distance_matrix = pd.DataFrame(distances_matrix, columns = standardized_stock_df.columns, index = standardized_stock_df.columns)
euclidean_distance_matrix

Unnamed: 0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,DOW,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
AAPL,0.0,27.08214,14.856802,15.08066,17.667615,5.383447,13.164109,26.994915,26.808261,18.332912,...,17.802967,4.507171,22.189778,15.069159,25.504845,26.317102,12.301279,27.568856,29.127526,11.17184
AMGN,27.08214,0.0,25.556368,22.999685,18.891401,27.352043,21.998346,10.006247,23.010379,22.487743,...,21.68936,26.838682,23.125899,21.224078,17.122579,14.403703,23.394424,21.777415,14.898269,21.380991
AXP,14.856802,25.556368,0.0,11.481042,13.568534,13.988369,12.746543,23.151228,22.832144,10.368162,...,16.387649,14.646604,15.92152,17.546223,20.019358,27.726718,11.305859,22.441717,24.148492,15.50972
BA,15.08066,22.999685,11.481042,0.0,8.809073,13.089201,7.467371,21.5154,26.415333,7.483865,...,10.082102,12.992469,13.446815,10.193725,18.275957,27.674201,5.931938,23.592993,25.074195,10.604833
CAT,17.667615,18.891401,13.568534,8.809073,0.0,16.936275,9.255858,18.160978,26.835225,10.631209,...,13.745935,16.523164,17.710081,13.205404,17.388206,25.000288,10.177336,24.860119,23.888887,11.387697
CRM,5.383447,27.352043,13.988369,13.089201,16.936275,0.0,12.240342,26.604788,26.633509,16.380709,...,16.124269,5.473756,20.38698,14.839218,24.758423,27.7169,10.511982,26.985264,29.20177,11.7491
CSCO,13.164109,21.998346,12.746543,7.467371,9.255858,12.240342,0.0,22.148259,27.633119,11.952458,...,12.544081,11.584016,17.760363,10.703621,20.971141,26.09582,7.339985,25.666024,26.018867,8.621277
CVX,26.994915,10.006247,23.151228,21.5154,18.160978,26.604788,22.148259,0.0,20.574298,19.731378,...,19.768483,26.7619,20.070592,21.533622,12.3498,17.301228,22.068837,18.946372,12.621046,21.724573
DIS,26.808261,23.010379,22.832144,26.415333,26.835225,26.633509,27.633119,20.574298,0.0,22.66351,...,26.78029,27.708824,19.873361,27.566622,21.40178,21.193176,25.999388,11.870714,17.003173,28.179912
DOW,18.332912,22.487743,10.368162,7.483865,10.631209,16.380709,11.952458,19.731378,22.66351,0.0,...,12.471242,17.124309,10.751511,14.387645,16.546988,27.60837,9.011257,19.930537,22.884842,14.796922


In [10]:
euclidean_distance_matrix = euclidean_distance_matrix.replace(np.nan, 0)

max_idx = euclidean_distance_matrix.values.argmax() # Find the flattened index of the maximum value
max_value = euclidean_distance_matrix.values.max() # Find the maximum value
max_row_idx, max_col_idx = divmod(max_idx, euclidean_distance_matrix.shape[1])

max_euclidean_distance_col = euclidean_distance_matrix.index[max_row_idx]
max_euclidean_distance_index = euclidean_distance_matrix.index[max_col_idx]


print("Basic Distance Approach Highest SDD Pairs Selection:" + max_euclidean_distance_col + ", " + max_euclidean_distance_index)

Basic Distance Approach Highest SDD Pairs Selection:CRM, MMM


In [11]:
euclidean_distance_matrix = euclidean_distance_matrix.replace(0, np.nan)


min_euclidean_distance = euclidean_distance_matrix.min().reset_index(name = 'euclidean_distance').min()

min_euclidean_distance_col = min_euclidean_distance.values[0]
min_euclidean_distance_value = min_euclidean_distance.values[1]
min_euclidean_distance_index = euclidean_distance_matrix[min_euclidean_distance_col][ (euclidean_distance_matrix[min_euclidean_distance_col] == min_euclidean_distance_value)].index[0]


print("Basic Distance Approach Lowest SDD Pairs Selection:" + min_euclidean_distance_col  + ", " + min_euclidean_distance_index )

Basic Distance Approach Lowest SDD Pairs Selection:AAPL, MSFT


In [12]:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Lowest Euclidean Distance Pair: " + min_euclidean_distance_col  + ", " + min_euclidean_distance_index,
                                                    "Highest Euclidean Distance Pair: "+ max_euclidean_distance_col + ", " + max_euclidean_distance_index))

fig.add_trace(
    go.Scatter(x = standardized_stock_df.index, 
               y = standardized_stock_df[min_euclidean_distance_col], 
               name = min_euclidean_distance_col), row = 1, col = 1)

fig.add_trace(
    go.Scatter(x = standardized_stock_df.index, 
               y = standardized_stock_df[min_euclidean_distance_index], 
               name = min_euclidean_distance_index), row = 1, col = 1)

fig.add_trace(
    go.Scatter(x = standardized_stock_df.index, 
               y = standardized_stock_df[max_euclidean_distance_col], 
               name = max_euclidean_distance_col), row = 1, col = 2)

fig.add_trace(
    go.Scatter(x = standardized_stock_df.index, 
               y = standardized_stock_df[max_euclidean_distance_index], 
               name = max_euclidean_distance_index), row = 1, col = 2)

fig.update_layout(template = temp,
                hovermode = 'closest',
                margin = dict(l = 20, r = 0, t = 30, b = 20),
                height = 350, 
                width = 1200, 
                showlegend = True,
                xaxis = dict(tickfont=dict(size=10)),  
                yaxis = dict(side = "left", tickfont = dict(size=10)),
                
                xaxis_showgrid = False, 
                xaxis2_showgrid = False) 

# ***Generating Trading Signal***


If the portfolio value exceeds the threshold: 

    A sell signal is generated.

If the value of the portfolio is below minus threshold:

    A buy signal is generated.

In [13]:
def long_short_operation(standardized_stock_df, col, index):

    last_value_1 = standardized_stock_df[col].values[-1]
    last_value_2 = standardized_stock_df[index].values[-1]

    if last_value_1 > last_value_2:

        long = col 
        short = index 
    
    elif last_value_1 < last_value_2:

        long = index
        short = col

    return long, short


long, short = long_short_operation(standardized_stock_df, min_euclidean_distance_col, min_euclidean_distance_index)
portfolio_values_df = ( standardized_stock_df[long] - standardized_stock_df[short] ).reset_index(name = 'portfolio_values')
std_threshold = portfolio_values_df['portfolio_values'].std() * 2 
portfolio_values_df

Unnamed: 0,Date,portfolio_values
0,2022-09-01,0.334484
1,2022-09-02,0.327092
2,2022-09-06,0.328227
3,2022-09-07,0.285350
4,2022-09-08,0.195592
...,...,...
246,2023-08-25,-0.126155
247,2023-08-28,-0.059982
248,2023-08-29,0.032501
249,2023-08-30,0.210692


In [14]:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Lowest Euclidean Distance Pair: " + min_euclidean_distance_col  + ", " + min_euclidean_distance_index,
                                                    "Long - Short Portfolio Value"))

fig.add_trace(
    go.Scatter(x = standardized_stock_df.index, 
               y = standardized_stock_df[min_euclidean_distance_col], 
               name = min_euclidean_distance_col), row = 1, col = 1)

fig.add_trace(
    go.Scatter(x = standardized_stock_df.index, 
               y = standardized_stock_df[min_euclidean_distance_index], 
               name = min_euclidean_distance_index), row = 1, col = 1)

fig.add_trace(
    go.Scatter(x = portfolio_values_df['Date'], 
               y = portfolio_values_df['portfolio_values'], 
               name = "Portfolio Values"), row = 1, col = 2)

fig.add_hline(y = std_threshold, row = 1, col = 2, line_dash = "dash", line_color = "red")
fig.add_hline(y = -1 * std_threshold, row = 1, col = 2, line_dash = "dash", line_color = "red")

fig.update_layout(template = temp,
                hovermode = 'closest',
                margin = dict(l = 20, r = 0, t = 30, b = 20),
                height = 350, 
                width = 1200, 
                showlegend = True,
                xaxis = dict(tickfont=dict(size=10)),  
                yaxis = dict(side = "left", tickfont = dict(size=10)),
                
                xaxis_showgrid = False, )

In [15]:
def trading_signal(x, threshold):

    if x > threshold:
        return -1

    elif x < -1 * threshold:
        return 1
    

portfolio_values_df['Trading Signal'] = portfolio_values_df.apply(lambda x: trading_signal(x['portfolio_values'], std_threshold), axis = 1)
portfolio_values_df['Trading Signal'] = portfolio_values_df['Trading Signal'].fillna(method = 'pad')
portfolio_values_df['Trading Signal'] = portfolio_values_df['Trading Signal'].fillna(value = 0)

portfolio_values_df

Unnamed: 0,Date,portfolio_values,Trading Signal
0,2022-09-01,0.334484,0.0
1,2022-09-02,0.327092,0.0
2,2022-09-06,0.328227,0.0
3,2022-09-07,0.285350,0.0
4,2022-09-08,0.195592,0.0
...,...,...,...
246,2023-08-25,-0.126155,1.0
247,2023-08-28,-0.059982,1.0
248,2023-08-29,0.032501,1.0
249,2023-08-30,0.210692,1.0


In [22]:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Lowest Euclidean Distance Pair: " + min_euclidean_distance_col  + ", " + min_euclidean_distance_index,
                                                    "Long - Short Portfolio Value"))

fig.add_trace(
    go.Scatter(x = standardized_stock_df.index, 
               y = standardized_stock_df[min_euclidean_distance_col], 
               name = min_euclidean_distance_col), row = 1, col = 1)

fig.add_trace(
    go.Scatter(x = standardized_stock_df.index, 
               y = standardized_stock_df[min_euclidean_distance_index], 
               name = min_euclidean_distance_index), row = 1, col = 1)

fig.add_trace(
    go.Scatter(x = portfolio_values_df['Date'], 
               y = portfolio_values_df['portfolio_values'], 
               name = "Portfolio Values"), row = 1, col = 2)

fig.add_trace(
    go.Scatter(x = portfolio_values_df['Date'], 
               y = portfolio_values_df['Trading Signal'], 
               name = "Trading Signal", 
               line = dict(dash = "dash", color = 'black')), row = 1, col = 2)

fig.add_hline(y = std_threshold, row = 1, col = 2, line_dash = "dash", line_color = "red", name = 'Threshold for buy/sell signal')
fig.add_hline(y = -1 * std_threshold, row = 1, col = 2, line_dash = "dash", line_color = "red")

fig.update_layout(template = temp,
                hovermode = 'closest',
                margin = dict(l = 20, r = 0, t = 30, b = 20),
                height = 350, 
                width = 1200, 
                showlegend = True,
                xaxis = dict(tickfont=dict(size=10)),  
                yaxis = dict(side = "left", tickfont = dict(size=10)),
                
                xaxis_showgrid = False, )

# ***Portfolio Performance***

In [17]:
# Calculate portfolio values with trading signal
portfolio_values_df['portfolio values with trading signal'] = portfolio_values_df['portfolio_values'] * portfolio_values_df['Trading Signal']

# Calculate long period returns
buying_signal_portfolio_df = portfolio_values_df[portfolio_values_df['Trading Signal'] == 1].copy()
buying_signal_portfolio_df['long_period_return'] = buying_signal_portfolio_df['portfolio values with trading signal'].pct_change()
buying_signal_portfolio_df = buying_signal_portfolio_df[['Date', 'long_period_return']]

# Calculate short period returns
selling_signal_portfolio_df = portfolio_values_df[portfolio_values_df['Trading Signal'] == -1].copy()
selling_signal_portfolio_df['short_period_return'] = selling_signal_portfolio_df['portfolio values with trading signal'].pct_change()
selling_signal_portfolio_df = selling_signal_portfolio_df[['Date', 'short_period_return']]

# Merge the results back into the main DataFrame
portfolio_values_df = portfolio_values_df.merge(buying_signal_portfolio_df, on = "Date", how = "left")
portfolio_values_df = portfolio_values_df.merge(selling_signal_portfolio_df, on = "Date", how = "left")

# Fill NaN values with 0
portfolio_values_df.fillna(value=0, inplace=True)

# Calculate investment strategy return
portfolio_values_df['Investment Strategy Return'] = portfolio_values_df['long_period_return'] + portfolio_values_df['short_period_return']
portfolio_values_df['Investment Strategy Cumulative Return'] = portfolio_values_df['Investment Strategy Return'].cumsum()

portfolio_values_df

Unnamed: 0,Date,portfolio_values,Trading Signal,portfolio values with trading signal,long_period_return,short_period_return,Investment Strategy Return,Investment Strategy Cumulative Return
0,2022-09-01,0.334484,0.0,0.000000,0.000000,0.0,0.000000,0.000000
1,2022-09-02,0.327092,0.0,0.000000,0.000000,0.0,0.000000,0.000000
2,2022-09-06,0.328227,0.0,0.000000,0.000000,0.0,0.000000,0.000000
3,2022-09-07,0.285350,0.0,0.000000,0.000000,0.0,0.000000,0.000000
4,2022-09-08,0.195592,0.0,0.000000,0.000000,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...
246,2023-08-25,-0.126155,1.0,-0.126155,-0.259210,0.0,-0.259210,40.492333
247,2023-08-28,-0.059982,1.0,-0.059982,-0.524535,0.0,-0.524535,39.967798
248,2023-08-29,0.032501,1.0,0.032501,-1.541851,0.0,-1.541851,38.425947
249,2023-08-30,0.210692,1.0,0.210692,5.482561,0.0,5.482561,43.908508


In [18]:
fig = make_subplots(rows=1, cols=2, subplot_titles=("Long - Short Portfolio Value: " + min_euclidean_distance_col  + ", " + min_euclidean_distance_index,
                                                    "Investment Strategy Value"))

fig.add_trace(
    go.Scatter(x = portfolio_values_df['Date'], 
               y = portfolio_values_df['portfolio_values'], 
               name = "Portfolio Values",
               line = dict(color = 'green')), row = 1, col = 1)

fig.add_trace(
    go.Scatter(x = portfolio_values_df['Date'], 
               y = portfolio_values_df['Trading Signal'], 
               name = "Trading Signal", 
               line = dict(dash = "dash", color = 'black')), row = 1, col = 1)

fig.add_trace(
    go.Scatter(x = portfolio_values_df['Date'], 
               y = portfolio_values_df['Investment Strategy Cumulative Return'], 
               name = "Investment Strategy Return",
               line = dict(color = 'navy')), row = 1, col = 2)


fig.add_hline(y = std_threshold, row = 1, col = 1, line_dash = "dash", line_color = "red")
fig.add_hline(y = -1 * std_threshold, row = 1, col = 1, line_dash = "dash", line_color = "red")

fig.update_layout(template = temp,
                hovermode = 'closest',
                margin = dict(l = 20, r = 0, t = 30, b = 20),
                height = 350, 
                width = 1200, 
                showlegend = True,
                xaxis = dict(tickfont=dict(size=10)),  
                yaxis = dict(side = "left", tickfont = dict(size=10)),
                
                xaxis_showgrid = False, )

# ***Portfolio Analysis and Compare to Benchmark Index: Dow Jones Index***

In [19]:
start_date = '2022-09-01'
end_date   = '2023-09-01'

# yfinance likes the tickers formatted as a list
ticks = yf.Ticker("^DJI")
benchmark_df = ticks.history(start = start_date, end = end_date)

benchmark_df['Benchmark(^DJI) Return'] = benchmark_df['Close'].pct_change() * 100
benchmark_df['Benchmark(^DJI) Cumulative Return'] = benchmark_df['Benchmark(^DJI) Return'].cumsum()

benchmark_df = benchmark_df[['Benchmark(^DJI) Return', 'Benchmark(^DJI) Cumulative Return']]
benchmark_df = benchmark_df.reset_index()
benchmark_df

Unnamed: 0,Date,Benchmark(^DJI) Return,Benchmark(^DJI) Cumulative Return
0,2022-09-01 00:00:00-04:00,,
1,2022-09-02 00:00:00-04:00,-1.067652,-1.067652
2,2022-09-06 00:00:00-04:00,-0.552833,-1.620485
3,2022-09-07 00:00:00-04:00,1.399821,-0.220664
4,2022-09-08 00:00:00-04:00,0.611882,0.391218
...,...,...,...
246,2023-08-25 00:00:00-04:00,0.725750,9.341543
247,2023-08-28 00:00:00-04:00,0.620382,9.961926
248,2023-08-29 00:00:00-04:00,0.846908,10.808834
249,2023-08-30 00:00:00-04:00,0.107786,10.916620


In [20]:
from Utils.portfolio_summary import BackTest_Summary_DF


portfolio_result = BackTest_Summary_DF(Daily_Returns = portfolio_values_df['Investment Strategy Return'].values, 
                                                    Date_Index = portfolio_values_df['Date'].to_list(),
                                                    labels = "Long/Short Portfolio")

benchmark_result = BackTest_Summary_DF(Daily_Returns = benchmark_df['Benchmark(^DJI) Return'].dropna().values, 
                                                    Date_Index = benchmark_df['Date'].to_list(),
                                                    labels = "Benchmark(^DJI)")


pd.concat([portfolio_result, benchmark_result], axis = 1)

Unnamed: 0,Long/Short Portfolio,Benchmark(^DJI)
Start Date,2022-09-01 00:00:00,2022-09-01 00:00:00-04:00
End Date,2023-08-31 00:00:00,2023-08-31 00:00:00-04:00
-,-,-
Annual Return,44.260394,10.517643
Cumulative Return,44.084757,10.43417
Annual Volatility,58.081487,15.496563
Sharpe_Ratio,0.76204,0.678708
Maximum_drawdown,40.252755,11.810068
Calmar_Ratio,1.099562,0.890566


In [21]:
fig = go.Figure()

fig.add_trace(
go.Scatter(x = benchmark_df['Date'], 
        y = benchmark_df['Benchmark(^DJI) Cumulative Return'], 
        name = '^DJI'))

fig.add_trace(
go.Scatter(x = portfolio_values_df['Date'], 
           y = portfolio_values_df['Investment Strategy Cumulative Return'], 
           name = "Investment Strategy Return"))

fig.update_layout(template = temp, 
                  title = "Benchmark Period Performance Comparison",
                hovermode = 'closest',
                margin = dict(l = 30, r =30, t = 30, b = 20),
                height = 400, 
                width = 1200, 
                showlegend = True,
                xaxis = dict(tickfont = dict(size=10)),
                yaxis = dict(side = "left", tickfont = dict(size=10)),
                legend = dict(font = dict(size = 10)),)

**In a real-world scenario, it's crucial to take into account the following factors:**

1. **Determination of Historical Standard Deviation Window:** One must carefully select the time period over which historical standard deviation is computed to establish an appropriate threshold.

2. **Incorporating Trading Costs and Short-Selling Constraints:** It's essential to factor in the costs associated with trading and any limitations related to short-selling when developing and implementing a strategy.
