In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import yfinance as yf
from datetime import datetime, timedelta

# Get the Market Capitalization
def get_market_cap(ticker):
    try:
        stock_data = yf.Ticker(ticker)
        market_cap = stock_data.info.get('marketCap')

        if market_cap is not None:
            return market_cap
        else:
            return None
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# Get the Price-to-Book ratio
def get_price_to_book(ticker):
    try:
        stock_data = yf.Ticker(ticker)
        book_value = stock_data.info.get('bookValue')
        price = stock_data.history(period='1d')['Close'].iloc[-1]

        if book_value is not None and price is not None and book_value != 0:
            price_to_book = price / book_value
            return price_to_book
        else:
            return None
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# Calculate the five-year average P/B
def calculate_five_year_average(ticker):
    end_date = datetime.now()
    start_date = end_date - timedelta(days=5 * 365)  # Five years ago

    try:
        stock_data = yf.Ticker(ticker)
        historical_data = stock_data.history(start=start_date, end=end_date)['Close']
        book_value = stock_data.info.get('bookValue')

        if book_value is not None and not historical_data.empty:
            pb_ratios = historical_data / book_value
            five_year_average = pb_ratios.mean()
            return five_year_average
        else:
            return None
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# Download Financial assets From S&P500 Financial Sector
def retrieve_financial_sector_assets():
    data = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    table = data[0]
    financial_sector = table[table['GICS Sector'] == 'Financials']  # XLF
    financial_sector_assets = financial_sector['Symbol'].tolist()
    return financial_sector_assets

# Select the top 10 assets based on ROI
def get_top_assets(asset_data, start, end):
    df_asset = yf.download(asset_data, start=start, end=end)['Adj Close']
    roi = (df_asset.iloc[-1] - df_asset.iloc[0]) / df_asset.iloc[0]
    top_10_ROI = roi.sort_values(ascending=False).head(10)
    roi_tickers = top_10_ROI.index.tolist()
    df_prices = yf.download(roi_tickers, start=start, end=end)['Adj Close']
    df_assets = df_prices.pct_change().dropna()
    return df_assets

# Calculate Centrality
def download_sector_data(symbols, start_date, end_date):
    df_sectors = yf.download(symbols, start=start_date, end=end_date)['Adj Close']
    df_sectors = df_sectors.pct_change().dropna()
    return df_sectors

def preprocess_data(df_sectors):
    scaler = StandardScaler()
    df_sectors_scaled = scaler.fit_transform(df_sectors)
    return df_sectors_scaled

def calculate_weight(market_cap):
    weights = [cap / np.sum(market_cap) for cap in market_cap]
    return weights

def calculate_weighted_df(df_sectors, market_cap):
    weights = calculate_weight(market_cap)
    sector_weights = dict(zip(df_sectors.columns, [weight * 100 for weight in weights]))

    weighted_df = df_sectors.copy()

    for sector in df_sectors.columns:
        weighted_df.loc[:, sector] *= np.sqrt(sector_weights[sector])

    return weighted_df

def calculate_centrality_score(X, n=2):
    pca_model = PCA(n_components=n)
    pca_model.fit(X)
    EV = pca_model.components_
    AR = pca_model.explained_variance_ratio_

    C_list = []
    for i in range(X.shape[1]):
        C_num = sum(AR[j] * abs(EV[j][i]) / sum(abs(EV[j][k]) for k in range(X.shape[1])) for j in range(n))
        C_denom = sum(AR[j] for j in range(n))
        C_list.append(C_num / C_denom)

    return C_list

# Calculate Relative Value
def fetch_sector_data(symbols, start_date, end_date):
    df_sectors_prices = yf.download(symbols, start=start_date, end=end_date)['Adj Close']
    df_sectors = df_sectors_prices.pct_change().dropna()
    return df_sectors

# Normalized by dividing its current p/b value with the asset's 5-year_p/b
def calc_normalize_price(price_book, five_year_ave_price_book):
    normalized_prices = [pb / five_pb for pb, five_pb in zip(price_book, five_year_ave_price_book)]
    return normalized_prices

def calc_asset_weight(normalized_price, five_year_ave_price_book):
    weights = [np * five_pb / sum(five_year_ave_price_book) for np, five_pb in zip(normalized_price, five_year_ave_price_book)]
    return weights

def calculate_relative_measure(normalized_prices, weights):
    normalized_prices = np.nan_to_num(normalized_prices, nan=0.0)
    weighted_average = np.dot(weights, normalized_prices)
    relative_measure = np.zeros_like(normalized_prices)
    nonzero_indices = (weighted_average != 0)
    relative_measure[nonzero_indices] = normalized_prices[nonzero_indices] / weighted_average[nonzero_indices]
    return relative_measure

def create_relative_scores_df(symbols, normalized_prices, weights):
    relative_measure = calculate_relative_measure(normalized_prices, weights)
    relative_scores = dict(zip(symbols, [np.around(r, 2) for r in relative_measure]))
    for asset in relative_scores:
        relative_scores[asset] = [relative_scores[asset]]
    scores_df = pd.DataFrame(data=relative_scores, index=['Relative_score'])
    scores_df = scores_df.transpose()
    return scores_df

# Combine Centrality and Relative Value
def combine_centrality_relative_value(weighted_df, symbols, normalized_prices, weights):
    centrality_scores = calculate_centrality_score(weighted_df)
    scores_df = create_relative_scores_df(symbols, normalized_prices, weights)

    centrality_df = pd.DataFrame(data=centrality_scores, columns=['Centrality'], index=symbols)
    relative_df = create_relative_scores_df(symbols, normalized_prices, weights)

    combined_scores_df = pd.concat([centrality_df, relative_df], axis=1)
    print(combined_scores_df)

    combined_scores_df.index.name = 'Ticker'
    return combined_scores_df

# Construct portfolios for assets with no bubbles, bubbles rising, & bubbles falling
def construct_portfolios(df_sectors, symbols, normalized_prices, weights):
    combined_scores_df = combine_centrality_relative_value(weighted_df, symbols, normalized_prices, weights)

    try:
        top_crowded = combined_scores_df.sort_values(by='Centrality', ascending=False).head(5).index
        top_overvalued = combined_scores_df.sort_values(by='Relative_score', ascending=False).head(5).index
    except KeyError as e:
        print(f"Error: {e}")
        return None, None, None

    if top_crowded.empty or top_overvalued.empty:
        print("Error: Top crowded or top overvalued is empty.")
        return None, None, None

    print("Top Five Crowded Sectors:", top_crowded)
    print("Top Five Overvalued Sectors:", top_overvalued)

    next_day_returns = df_sectors.shift(-1)

    no_bubble_portfolio = combined_scores_df[~combined_scores_df.index.isin(top_crowded.union(top_overvalued))]
    bubble_run_up_portfolio = combined_scores_df[combined_scores_df.index.isin(top_crowded) & ~combined_scores_df.index.isin(top_overvalued)]
    bubble_sell_off_portfolio = combined_scores_df[combined_scores_df.index.isin(top_crowded) & combined_scores_df.index.isin(top_overvalued)]

    return no_bubble_portfolio, bubble_run_up_portfolio, bubble_sell_off_portfolio, next_day_returns

# Impute Values below:
if __name__ == "__main__":
    # Input the timeframe
    start_date = "2018-01-01"
    end_date = "2023-01-01"
    financial_sector_assets = retrieve_financial_sector_assets()
    df_financial_assets = get_top_assets(financial_sector_assets, start=start_date, end=end_date)

    # Allow symbols of top 10 assets or input other 10 preferred asset tickers as symbols
    symbols = df_financial_assets.columns.tolist()
    # symbols = ['AJG', 'AON', 'BRO', 'BX', 'MA', 'MMC', 'MSCI', 'NDAQ', 'PGR', 'WRB']

    market_cap = [get_market_cap(ticker) for ticker in symbols]
    price_book = [get_price_to_book(ticker) for ticker in symbols]
    five_year_ave_price_book = [calculate_five_year_average(ticker) for ticker in symbols]
    sector_data = download_sector_data(symbols, start_date, end_date)
    scaled_data = preprocess_data(sector_data)
    df_sectors = download_sector_data(symbols, start_date, end_date)
    weighted_df = calculate_weighted_df(df_sectors, market_cap)
    weighted_scores = calculate_centrality_score(weighted_df)
    df_sectors = fetch_sector_data(symbols, start_date, end_date)
    normalized_prices = calc_normalize_price(price_book, five_year_ave_price_book)
    weights = calc_asset_weight(normalized_prices, five_year_ave_price_book)
    relative_measure = calculate_relative_measure(normalized_prices, weights)
    scores_df = create_relative_scores_df(symbols, normalized_prices, weights)
    df_sectors.index.names = ['Date']

    no_bubble_portfolio, bubble_run_up_portfolio, bubble_sell_off_portfolio, next_day_returns = construct_portfolios(
        df_sectors, symbols, normalized_prices, weights)

    print("***No Bubble Portfolio***:")
    print(no_bubble_portfolio)
    print("\n***Bubble Run-up Portfolio***:")
    print(bubble_run_up_portfolio)
    print("\n***Bubble Sell-off Portfolio***:")
    print(bubble_sell_off_portfolio)


[*********************100%%**********************]  72 of 72 completed

ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')



[*********************100%%**********************]  10 of 10 completed
[*********************100%%**********************]  10 of 10 completed
[*********************100%%**********************]  10 of 10 completed
[*********************100%%**********************]  10 of 10 completed
      Centrality  Relative_score
AJG     0.061458            1.22
AON     0.067538            0.93
BRO     0.040818            1.03
BX      0.210663            1.08
MA      0.304872            0.89
MMC     0.080872            1.03
MSCI    0.079732            0.88
NDAQ    0.052101            0.81
PGR     0.066579            1.15
WRB     0.035368            0.99
Top Five Crowded Sectors: Index(['MA', 'BX', 'MMC', 'MSCI', 'AON'], dtype='object', name='Ticker')
Top Five Overvalued Sectors: Index(['AJG', 'PGR', 'BX', 'BRO', 'MMC'], dtype='object', name='Ticker')
***No Bubble Portfolio***:
        Centrality  Relative_score
Ticker                            
NDAQ      0.052101            0.81
WRB       0.035368 