### KPI Selection and Filtering

1. **Notebook Objective:**
	- This notebook will collect KPIs selected by the user for stocks identified from the Sharpe Ratio calculations
	- It will filter out stocks that underperform the SPY based on their Sharpe Ratio
	- The user can choose which KPIs to analyze
	- The notebook will then filter stocks based on whether their KPIs are above or below the sector median, according to user-defined criteria
	- It will also allow the user to plot the distribution of selected KPIs to identify trends or commonalities


In [1]:
import time
import os 
import sys

import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, List, Optional
import pandas as pd
from tenacity import retry, stop_after_attempt, wait_exponential
from tqdm import tqdm
from functools import lru_cache
import yfinance as yf

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
print(f'Current Working Directory: {os.getcwd()}')

from utils.helpers import divide_chunks

Current Working Directory: /Users/blakeuribe/Desktop/portfolio_py/notebooks

---------------------------------
helpers.py successfully loaded, updated last Feb. 04 2025
---------------------------------




Set up logging and batch processing to prevent rate limits or data request rejections.

In [2]:
start_time = time.time() # measuring run time

# Setup logging, and batching to avoid kickbacks

logging.basicConfig(level=logging.INFO)

@lru_cache(maxsize=1000)
def fetch_ticker_info_cached(ticker_symbol: str) -> Dict[str, Optional[float]]:
    """
    Cached retrieval of ticker info from Yahoo Finance to minimize repeated API calls.
    """
    try:
        return yf.Ticker(ticker_symbol).info
    except Exception as e:
        logging.error(f"Error fetching data for {ticker_symbol}: {e}")
        return {}

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10)
)

def fetch_with_retry(ticker: str) -> Dict[str, Optional[float]]:
    """
    Retry fetching data for a ticker symbol using tenacity.
    """
    return fetch_ticker_info_cached(ticker)

def get_stock_kpis(ticker: str, kpis: List[str]) -> Dict[str, Optional[float]]:
    """
    Fetch KPIs for a specific stock ticker.
    """
    try:
        info = fetch_with_retry(ticker)
        kpi_data = {kpi: info.get(kpi) for kpi in kpis}
        kpi_data['Tickers'] = ticker  # Add ticker column
        return kpi_data
    except Exception as e:
        logging.error(f"Error processing {ticker}: {e}")
        return {'Tickers': ticker, **{kpi: None for kpi in kpis}}


def batch_process_stocks(tickers: List[str], kpis: List[str], max_workers: int = 10) -> pd.DataFrame:
    """
    Process stock tickers in batches using ThreadPoolExecutor for parallel API calls.
    """
    results = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_ticker = {
            executor.submit(get_stock_kpis, ticker, kpis): ticker
            for ticker in tickers
        }

        for future in tqdm(as_completed(future_to_ticker), total=len(tickers)):
            results.append(future.result())

    # Create DataFrame from results
    df = pd.DataFrame(results)
    return df


In [3]:
sharpe_df = pd.read_csv(f'../data/clean/sharpe_ratios.csv')

spy_sharpe = sharpe_df[sharpe_df['Tickers'] == 'SPY']['Sharpe_ratios'].iloc[0]
sharpe_filtered_df = sharpe_df[sharpe_df['Sharpe_ratios'] > (spy_sharpe)] # greater than benchmark

sector_df = pd.read_csv('../data/clean/master_equity_data.csv')[['Tickers', 'Sector', 'title']]

sector_sharpe_df = pd.merge(left=sharpe_filtered_df, right=sector_df, on='Tickers', how='left')

sector_sharpe_df['Sector_Mean'] = (
    sector_sharpe_df.groupby('Sector')['Sharpe_ratios']
             .transform('mean')
)

filtered_df = sector_sharpe_df[
    sector_sharpe_df['Sharpe_ratios'] > sector_sharpe_df['Sector_Mean'] # greater than sector mean
].reset_index(drop=True)

filtered_df

Unnamed: 0,Tickers,Sharpe_ratios,Date_Collected,Sector,title,Sector_Mean
0,A,1.965316,2025-07-12,Healthcare,"AGILENT TECHNOLOGIES, INC.",1.254588
1,AA,1.825040,2025-07-12,Basic Materials,Alcoa Corp,1.353723
2,AACB,1.960840,2025-07-12,Financial Services,Artius II Acquisition Inc.,1.266145
3,AAGH,1.713860,2025-07-12,Healthcare,America Great Health,1.254588
4,AAGR,5.237588,2025-07-12,Consumer Defensive,African Agriculture Holdings Inc.,1.397734
...,...,...,...,...,...,...
1152,VIVE,1.944679,2025-07-12,Healthcare,"VIVEVE MEDICAL, INC.",1.254588
1153,VJTTY,3.282954,2025-07-12,Technology,voxeljet AG,1.305902
1154,VLN,1.331954,2025-07-12,Technology,Valens Semiconductor Ltd.,1.305902
1155,VNET,1.700675,2025-07-12,Technology,"VNET Group, Inc.",1.305902


In [4]:
# Fetch KPI's

num_in_chunks = 20
tickers = filtered_df['Tickers'].tolist()

print('\n----Intiating Chunk Process----')
ticker_chunks = list(divide_chunks(tickers, num_in_chunks))
print(f'{len(tickers)} tikcers divided into {len(ticker_chunks)} chunks')
        
essential_kpis = ['marketCap', 'trailingPE', 'forwardPE', 
                  'profitMargins', 'trailingEps', 'quickRatio',
                  'earningsQuarterlyGrowth'
]

df_list = []

# Loop through each chunk and process the stocks
print('\n----Fethcing KPIs----')
for chunk in ticker_chunks:
    kpi_df_chunk = batch_process_stocks(chunk, essential_kpis)
    df_list.append(kpi_df_chunk)

# Concatenate all DataFrames vertically
kpi_df = pd.concat(df_list, axis=0, ignore_index=True).dropna()
print(f'KPI Df Shape: {kpi_df.shape}')


----Intiating Chunk Process----
1157 tikcers divided into 58 chunks

----Fethcing KPIs----


100%|██████████| 20/20 [00:01<00:00, 11.46it/s]
100%|██████████| 20/20 [00:04<00:00,  4.37it/s]
100%|██████████| 20/20 [00:07<00:00,  2.79it/s]
100%|██████████| 20/20 [00:01<00:00, 11.85it/s]
100%|██████████| 20/20 [00:01<00:00, 15.83it/s]
100%|██████████| 20/20 [00:01<00:00, 12.77it/s]
100%|██████████| 20/20 [00:01<00:00, 13.33it/s]
100%|██████████| 20/20 [00:01<00:00, 15.12it/s]
100%|██████████| 20/20 [00:01<00:00, 10.37it/s]
100%|██████████| 20/20 [00:02<00:00,  7.60it/s]
100%|██████████| 20/20 [00:01<00:00, 13.09it/s]
100%|██████████| 20/20 [00:02<00:00,  9.68it/s]
100%|██████████| 20/20 [00:01<00:00, 12.95it/s]
100%|██████████| 20/20 [00:01<00:00, 10.98it/s]
100%|██████████| 20/20 [00:02<00:00,  9.65it/s]
100%|██████████| 20/20 [00:04<00:00,  4.74it/s]
100%|██████████| 20/20 [00:02<00:00,  9.00it/s]
100%|██████████| 20/20 [00:01<00:00, 13.94it/s]
100%|██████████| 20/20 [00:01<00:00, 14.58it/s]
100%|██████████| 20/20 [00:01<00:00, 13.88it/s]
100%|██████████| 20/20 [00:01<00:00, 14.

KPI Df Shape: (172, 8)



  kpi_df = pd.concat(df_list, axis=0, ignore_index=True).dropna()


In [5]:
import numpy as np


valuation_df = pd.merge(filtered_df[['Tickers', 'Sharpe_ratios', 'Sector', 'title']], kpi_df, on='Tickers').dropna()

cols_to_float = ['trailingPE', 'forwardPE']
valuation_df[cols_to_float] = valuation_df[cols_to_float].astype(float)

# valuation_df

In [6]:
grouped = valuation_df.groupby(['Sector']).median(numeric_only=True)
sector_stock_metrics = pd.merge(valuation_df, grouped, on='Sector', suffixes=('_stock', '_sector'))

# filtering
end_filter_df = sector_stock_metrics[
    (sector_stock_metrics['marketCap_stock'] >= 2_000_000_000) & # larger than small-cap stocks
    (sector_stock_metrics['profitMargins_stock'] >= sector_stock_metrics['profitMargins_sector']) & # performance metric
    (sector_stock_metrics['trailingPE_stock'] <= sector_stock_metrics['trailingPE_sector']) & # risk metric
    (sector_stock_metrics['quickRatio_stock'] >= sector_stock_metrics['quickRatio_sector']) & # performance metric
    (sector_stock_metrics['earningsQuarterlyGrowth_stock'] >= 0) # performance metric
]


stock_cols_only = [col for col in end_filter_df.columns if '_sector' not in col]
filtered_valuation_df = end_filter_df[stock_cols_only] # export only stock info, no need for sector kpi info

filtered_valuation_df.columns = filtered_valuation_df.columns.str.replace('_stock', '', regex=False) # clean up excesive stock col name


filtered_valuation_df.to_csv('../data/clean/filtered_valuation_df.csv', index=False)



print(f'\nTotal Run Time: {time.time() - start_time} seconds')

filtered_valuation_df.reset_index(drop=True)


Total Run Time: 156.67234587669373 seconds


Unnamed: 0,Tickers,Sharpe_ratios,Sector,title,marketCap,trailingPE,forwardPE,profitMargins,trailingEps,quickRatio,earningsQuarterlyGrowth
0,ABT,1.292091,Healthcare,ABBOTT LABORATORIES,229693700000.0,17.145456,25.585274,0.31886,7.7,1.09,0.082
1,ADMA,1.505229,Healthcare,"ADMA BIOLOGICS, INC.",4418967000.0,22.035715,25.708332,0.45011,0.84,3.197,0.511
2,AGX,1.950191,Industrials,ARGAN INC,2895260000.0,29.60251,41.94664,0.11001,7.17,1.689,1.861
3,ATGE,1.497722,Consumer Defensive,Adtalem Global Education Inc.,4239145000.0,19.964468,17.93161,0.13341,5.91,0.707,0.652
4,EBAY,1.574066,Consumer Cyclical,EBAY INC,35535680000.0,18.536058,14.772032,0.19783,4.16,1.072,0.148
5,EXEL,1.782874,Healthcare,"EXELIXIS, INC.",12233680000.0,20.484018,26.388235,0.27994,2.19,3.322,3.277
6,FINV,1.86706,Financial Services,FinVolution Group,2686189000.0,7.6259,7.210885,0.19415,1.39,3.585,0.414
7,FTDR,1.376674,Consumer Cyclical,"Frontdoor, Inc.",4268712000.0,18.895765,18.474522,0.12586,3.07,1.249,0.088
8,IDCC,1.878982,Technology,"InterDigital, Inc.",5842761000.0,17.821993,46.63975,0.48139,12.64,1.71,0.416
9,KGC,2.309481,Basic Materials,KINROSS GOLD CORP,19114530000.0,15.867347,17.670454,0.2174,0.98,1.146,2.439
