In [None]:
%pip install --upgrade pip
%pip install yfinance
%pip install matplotlib
%pip install pandas
import yfinance as yf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [18]:
# Taking all the tickers in S&P 500 from the financial industry
tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
financial_tickers = tickers.loc[tickers['GICS Sector'] == 'Financials']['Symbol']
tickers_list = ["".join(ticker) for ticker in financial_tickers][:5]

In [19]:
# Downloading data for all tickers
stocks = yf.download(tickers_list)
# close = stocks.loc[:,"Close"].drop(columns='BRK.B').dropna()
close = stocks.loc[:,"Close"].dropna()
print(close.tail(2))

[*********************100%%**********************]  5 of 5 completed
                  AFL        AIG         ALL         AMP         AXP
Date                                                                
2024-02-07  78.830002  69.809998  158.600006  391.359985  209.080002
2024-02-08  78.110001  68.410004  161.750000  394.829987  211.210007


In [22]:
#Storing min and max value of each stock
min_dict = {}
max_dict = {}
for ticker in tickers_list:
    min_dict[ticker] = round(close[ticker].min(), 2)
    max_dict[ticker] = round(close[ticker].max(), 2)

print(min_dict)
print(max_dict)

{'AFL': 5.74, 'ALL': 14.12, 'AXP': 10.26, 'AIG': 7.0, 'AMP': 12.37}
{'AFL': 85.73, 'ALL': 161.75, 'AXP': 211.21, 'AIG': 1456.2, 'AMP': 394.83}


In [32]:
# Normalising prices for all stocks
normclose = pd.DataFrame()
for ticker in tickers_list:
    normclose[ticker] = (close[ticker] - min_dict[ticker]) / (max_dict[ticker] - min_dict[ticker])

normclose.head()

Unnamed: 0_level_0,AFL,ALL,AXP,AIG,AMP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-09-15,0.208651,0.27176,0.199663,0.836323,0.064425
2005-09-16,0.214214,0.277789,0.207939,0.841154,0.063353
2005-09-19,0.214652,0.266206,0.206066,0.828043,0.063614
2005-09-20,0.212089,0.264106,0.203757,0.830251,0.063614
2005-09-21,0.205213,0.255436,0.197224,0.814932,0.06283


In [40]:
# Calculating squared differences for each pair of stock
from itertools import combinations

sq_diff_df = pd.DataFrame(index=normclose.index)

for combination in combinations(tickers_list, 2):
    combination_string = " - ".join(combination)
    sq_diff_df[combination_string] = (normclose[combination[1]] - normclose[combination[0]]) ** 2

sq_diff_df.head()

Unnamed: 0_level_0,AFL - ALL,AFL - AXP,AFL - AIG,AFL - AMP,ALL - AXP,ALL - AIG,ALL - AMP,AXP - AIG,AXP - AMP,AIG - AMP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2005-09-15,0.003983,8.1e-05,0.393973,0.020801,0.005198,0.318731,0.042988,0.405337,0.018289,0.595827
2005-09-16,0.004042,3.9e-05,0.393053,0.022759,0.004879,0.31738,0.045983,0.400961,0.020905,0.604974
2005-09-19,0.002658,7.4e-05,0.376249,0.022812,0.003617,0.315661,0.041043,0.386855,0.020292,0.584351
2005-09-20,0.002706,6.9e-05,0.382124,0.022045,0.003642,0.32052,0.040197,0.392494,0.01964,0.587732
2005-09-21,0.002522,6.4e-05,0.371758,0.020273,0.003389,0.313036,0.037097,0.381564,0.018062,0.565658
