In [None]:
import yfinance as yf
import pandas as pd
import numpy as np

In [None]:
FTSE_100 = ['III.L', 'ADM.L', 'AAL.L', 'ANTO.L', 'AHT.L', 'ABF.L', 'AZN.L', 'AV.L', 'BA.L', 'BARC.L', 'BDEV.L', 'BEZ.L',
 'BKG.L', 'BP.L', 'BATS.L', 'BLND.L', 'BT-A.L', 'BNZL.L', 'CNA.L', 'CPG.L', 'CRDA.L', 'DCC.L', 'DGE.L', 'DPLM.L', 'EZJ.L',
 'ENT.L', 'EXPN.L', 'FCIT.L', 'FRAS.L', 'FRES.L', 'GLEN.L','GSK.L', 'HLMA.L','HL.L', 'HIK.L', 'HSX.L', 'HWDN.L', 'HSBA.L',
 'IHG.L', 'IMI.L', 'IMB.L','INF.L', 'IAG.L', 'ITRK.L','JD.L', 'KGF.L', 'LAND.L', 'LGEN.L', 'LLOY.L', 'LMP.L', 'LSEG.L',
 'MKS.L', 'MRO.L','MNDI.L', 'NG.L', 'NWG.L', 'NXT.L', 'PSON.L', 'PSN.L','PHNX.L', 'PRU.L', 'RKT.L', 'REL.L', 'RTO.L',
 'RMV.L', 'RIO.L', 'RR.L', 'SGE.L', 'SBRY.L','SDR.L', 'SMT.L','SGRO.L', 'SVT.L', 'SHEL.L', 'SN.L', 'SMDS.L','SMIN.L', 'SPX.L',
 'SSE.L', 'STAN.L', 'TW.L', 'TSCO.L', 'ULVR.L', 'UU.L', 'UTG.L', 'VTY.L','VOD.L', 'WEIR.L','WTB.L', 'WPP.L']

In [None]:
# Read the S&P 500 table from the Wikipedia page
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
tables = pd.read_html(url)

# Extract the first table, which contains the list of S&P 500 companies
sp500_table = tables[0]

# Display the DataFrame
print(sp500_table)

# Optionally, convert the 'Symbol' column to a Python list of tickers
sp500_tickers = sp500_table['Symbol'].tolist()
print(len(sp500_tickers))


In [None]:
start_date = '2001-10-01'
end_date = '2011-09-30'

df_prices = pd.DataFrame()
df_returns = pd.DataFrame()
df_standardized_returns = pd.DataFrame()
counter = 0
for ticker in FTSE100_tickers:
    # Download data for each ticker
    data = yf.download(ticker, start=start_date, end=end_date)
    data = data.rename(columns={'Adj Close':ticker})  # Rename 'Close' to 'Adj Close'
    
    # Calculate log returns on standardized prices
    data['log_return'] = np.log(data[f'{ticker}'] / data[f'{ticker}'].shift(1))
    data_return = data[['log_return']]
    data_return = data_return.rename(columns={'log_return':ticker})
    data_return = data_return.dropna()

    data_prices = data[[ticker]]

    # Standardize the returns
    data_standardized_returns = data_return.copy(deep=True)
    mean_return = data_return[ticker].mean()
    std_return = data_return[ticker].std()
    data_standardized_returns[ticker] = (data_return[ticker] - mean_return) / std_return
    
    if counter == 0:
        df_prices = data_prices
        df_returns = data_return
        df_standardized_returns = data_standardized_returns
    else:
        df_prices = pd.concat([df_prices, data_prices], axis=1)
        df_returns = pd.concat([df_returns, data_return], axis=1)
        df_standardized_returns = pd.concat([df_standardized_returns, data_standardized_returns], axis=1)
    counter += 1
    print(f'{counter}/{len(sp500_tickers)}')
df_standardized_returns.to_csv('returns_standardized_S&P.csv')  # Save the data to a CSV file
df_prices.to_csv('stock_prices_S&P.csv')  # Save the data to a CSV file
df_returns.to_csv('stock_returns_S&P.csv')  # Save the data to a CSV file




In [None]:
df_standardized_returns = pd.read_csv('returns_standardized_S&P.csv')
df_standardized_returns = df_standardized_returns.dropna(axis=1)
df_standardized_returns.set_index("Date", inplace = True)
df_standardized_returns.drop(columns=['Unnamed: 0'], inplace=True)
df_standardized_returns.to_csv('returns_standardized_S&P.csv')
df_standardized_returns

In [None]:
names = df_standardized_returns.columns.to_list()

### Obtaining the filtered correlation matrix $C^{(g)}$

In [None]:
# file_path must be the standardized return data
def calculate_C_g(file_path):
    # Load the standardized returns data
    df_standardized_returns = pd.read_csv(file_path)
    
    # Set the 'Date' column as the index if it's not already
    if 'Date' in df_standardized_returns.columns:
        df_standardized_returns.set_index('Date', inplace=True)
    
    # Calculate the correlation matrix on the returns data
    correlation_matrix = df_standardized_returns.corr()

    # Save the correlation matrix to a new CSV file
    correlation_matrix.to_csv('correlation_matrix.csv')
    
    # Calculate the eigenvalues and eigenvectors of the correlation matrix
    eigenvalues, eigenvectors = np.linalg.eig(correlation_matrix)
    
    # Calculate lambda boundaries using RMT
    T = len(df_standardized_returns)
    N = len(df_standardized_returns.columns)
    lambda_plus = (1 + np.sqrt(N / T))**2
    lambda_min = (1 - np.sqrt(N / T))**2  # Not used in this code but calculated as per RMT
    
    # Obtaining eigenvalues and eigenvectors above lambda_plus
    denoised_eigenvalues = []
    denoised_eigenvectors = []
    
    for index, eigenvalue in enumerate(eigenvalues):
        if eigenvalue > lambda_plus:
            denoised_eigenvalues.append(eigenvalue)
            denoised_eigenvectors.append(eigenvectors[:, index])  # Corresponding eigenvector
    
    # Remove the largest eigenvalue (global mode) from denoised values
    if denoised_eigenvalues:
        max_value = max(denoised_eigenvalues)
        max_index = denoised_eigenvalues.index(max_value)
        denoised_eigenvalues.pop(max_index)
        denoised_eigenvectors.pop(max_index)
    
    # Reconstruct the filtered correlation matrix C^(g)
    C_g = np.zeros_like(correlation_matrix)
    for i, eigenvalue in enumerate(denoised_eigenvalues):
        eigenvector = np.array(denoised_eigenvectors[i]).reshape(-1, 1)  # Column vector
        C_g += eigenvalue * (eigenvector @ eigenvector.T)  # Outer product
    
    # Return the filtered correlation matrix
    return C_g

In [None]:
C_g = calculate_C_g('returns_standardized_S&P.csv')
C_g.shape

### Modified spectral method to obtain partitions

In [None]:
def spectral_method(C_g):
    # Perform eigendecomposition
    eigenvalues, eigenvectors = np.linalg.eig(C_g)
    max_eigenvalue_index = np.argmax(eigenvalues)
    leading_eigenvector = eigenvectors[:, max_eigenvalue_index]
    
    community_1 = []
    community_2 = []
    
    # Creating the communities based on the sign of the eigenvector
    for i in range(len(leading_eigenvector)):
        if leading_eigenvector[i] > 0:
            community_1.append(i)  
        else:
            community_2.append(i) 
    
    return [community_1, community_2]

def calculate_modularity(C_g, partitions):
    # C_norm is the total sum of C_g (Eq.38)
    C_norm = np.sum(C_g)
    modularity = 0.0
    
    # Calculate modularity based on the partition
    for community in partitions:
        for i in community:
            for j in community:
                modularity += C_g[i, j]
    
    # Normalize modularity by C_norm
    modularity /= C_norm
    return modularity

def recursive_spectral_method(C_g):
    result_communities = []

    # Obtain communities
    communities = spectral_method(C_g)
    
    # Calculate respective modularity score
    modularity_score = calculate_modularity(C_g, communities)

def recursive_spectral_method(C_g, min_size=2, modularity_threshold=0.00001):
    result_communities = []

    # Recursive function to split communities
    def split_community(community_nodes):
        # If community is too small, add it directly to result_communities
        if len(community_nodes) <= min_size:
            result_communities.append(community_nodes)
            return

        # Extract the submatrix for the current community
        submatrix = C_g[np.ix_(community_nodes, community_nodes)]

        # Apply spectral method to split into two communities
        communities = spectral_method(submatrix)

        # Map the sub-community indices back to the original indices
        community_1 = [community_nodes[i] for i in communities[0]]
        community_2 = [community_nodes[i] for i in communities[1]]
        # Calculate modularity before and after the split
        initial_modularity = calculate_modularity(C_g, [community_nodes])
        new_modularity = calculate_modularity(C_g, [community_1, community_2])
        # Check if the split improves modularity significantly
        if (new_modularity - initial_modularity) > modularity_threshold:
            # Recursively split each resulting community
            split_community(community_1)
            split_community(community_2)
        else:
            # If modularity gain is too low, add the original community without splitting
            result_communities.append(community_nodes)

    # Start recursive splitting from the entire set of nodes
    all_nodes = list(range(len(C_g)))
    split_community(all_nodes)

    for partition in result_communities:
        company_list = []
        for i in partition:
            company_list.append(names[i+1])
        print(company_list)
    return result_communities
    

In [None]:
FTSE_100 = ['III.L', 'ADM.L', 'AAL.L', 'ANTO.L', 'AHT.L', 'ABF.L', 'AZN.L', 'AV.L', 'BA.L', 'BARC.L', 'BDEV.L', 'BEZ.L',
 'BKG.L', 'BP.L', 'BATS.L', 'BLND.L', 'BT-A.L', 'BNZL.L', 'CNA.L', 'CPG.L', 'CRDA.L', 'DCC.L', 'DGE.L', 'DPLM.L', 'EZJ.L',
 'ENT.L', 'EXPN.L', 'FCIT.L', 'FRAS.L', 'FRES.L', 'GLEN.L','GSK.L', 'HLMA.L','HL.L', 'HIK.L', 'HSX.L', 'HWDN.L', 'HSBA.L',
 'IHG.L', 'IMI.L', 'IMB.L','INF.L', 'IAG.L', 'ITRK.L','JD.L', 'KGF.L', 'LAND.L', 'LGEN.L', 'LLOY.L', 'LMP.L', 'LSEG.L',
 'MKS.L', 'MRO.L','MNDI.L', 'NG.L', 'NWG.L', 'NXT.L', 'PSON.L', 'PSN.L','PHNX.L', 'PRU.L', 'RKT.L', 'REL.L', 'RTO.L',
 'RMV.L', 'RIO.L', 'RR.L', 'SGE.L', 'SBRY.L','SDR.L', 'SMT.L','SGRO.L', 'SVT.L', 'SHEL.L', 'SN.L', 'SMDS.L','SMIN.L', 'SPX.L',
 'SSE.L', 'STAN.L', 'TW.L', 'TSCO.L', 'ULVR.L', 'UU.L', 'UTG.L', 'VTY.L','VOD.L', 'WEIR.L','WTB.L', 'WPP.L']

In [None]:
recursive_spectral_method(C_g, modularity_threshold = 0.3)