This Notebook will collect KPI's of users choice on selected stocks from Sharpe Calculations

In [1]:
import time
import os 

start_time = time.time() # measuring run time

WORKSPACE_DIR = os.getenv('WORKSPACE_DIR')
if not os.getcwd().endswith('portfolio_py'):
    os.chdir(f'{WORKSPACE_DIR}/portfolio_py')
print(f'Current Working Directory: {os.getcwd()}')

from utils.helpers import divide_chunks

Current Working Directory: /Users/blakeuribe/Desktop/portfolio_py

---------------------------------
helpers.py successfully loaded, updated last Feb. 04 2025
---------------------------------




In [2]:
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, List, Optional
import pandas as pd
from tenacity import retry, stop_after_attempt, wait_exponential
from tqdm import tqdm
from functools import lru_cache
import yfinance as yf

Set up Logging, and Batch Processes to Aviod Kickbakcs

In [None]:
# Setup logging, and batching to avoid kickbacks

logging.basicConfig(level=logging.INFO)

@lru_cache(maxsize=1000)
def fetch_ticker_info_cached(ticker_symbol: str) -> Dict[str, Optional[float]]:
    """
    Cached retrieval of ticker info from Yahoo Finance to minimize repeated API calls.
    """
    try:
        return yf.Ticker(ticker_symbol).info
    except Exception as e:
        logging.error(f"Error fetching data for {ticker_symbol}: {e}")
        return {}

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10)
)

def fetch_with_retry(ticker: str) -> Dict[str, Optional[float]]:
    """
    Retry fetching data for a ticker symbol using tenacity.
    """
    return fetch_ticker_info_cached(ticker)

def get_stock_kpis(ticker: str, kpis: List[str]) -> Dict[str, Optional[float]]:
    """
    Fetch KPIs for a specific stock ticker.
    """
    try:
        info = fetch_with_retry(ticker)
        kpi_data = {kpi: info.get(kpi) for kpi in kpis}
        kpi_data['Tickers'] = ticker  # Add ticker column
        return kpi_data
    except Exception as e:
        logging.error(f"Error processing {ticker}: {e}")
        return {'Tickers': ticker, **{kpi: None for kpi in kpis}}


def batch_process_stocks(tickers: List[str], kpis: List[str], max_workers: int = 10) -> pd.DataFrame:
    """
    Process stock tickers in batches using ThreadPoolExecutor for parallel API calls.
    """
    results = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_ticker = {
            executor.submit(get_stock_kpis, ticker, kpis): ticker
            for ticker in tickers
        }

        for future in tqdm(as_completed(future_to_ticker), total=len(tickers)):
            results.append(future.result())

    # Create DataFrame from results
    df = pd.DataFrame(results)
    return df


In [4]:
# Find Benchmark Sharpe

sharpe_ratio_df = pd.read_csv(f'{WORKSPACE_DIR}/portfolio_py/data/clean/sharpe_ratios.csv')
spy_sharpe = sharpe_ratio_df.loc[sharpe_ratio_df['Tickers'] == 'SPY', 'Sharpe_ratios'].values[0]

# Filter only stocks above benchmark
sharpe_ratio_df = sharpe_ratio_df[sharpe_ratio_df['Sharpe_ratios'] >= spy_sharpe]

print(f'Spy Sharpe: {spy_sharpe.round(2)}')

Spy Sharpe: 0.56


In [5]:
num_in_chunks = 20
tickers = sharpe_ratio_df['Tickers'].tolist()

print('\n----Intiating Chunk Process----')
ticker_chunks = list(divide_chunks(tickers, num_in_chunks))
print(f'{len(tickers)} tikcers divided into {len(ticker_chunks)} chunks')
        
essential_kpis = ['marketCap', 'trailingPE', 'forwardPE', 'profitMargins', 'trailingEps']

df_list = []

# Loop through each chunk and process the stocks
print('\n----Fethcing KPIs----')
for chunk in ticker_chunks:
    kpi_df_chunk = batch_process_stocks(chunk, essential_kpis)
    df_list.append(kpi_df_chunk)

# Concatenate all DataFrames vertically
kpi_df = pd.concat(df_list, axis=0, ignore_index=True).dropna()
print(f'KPI Df Shape: {kpi_df.shape}')


----Intiating Chunk Process----
227 tikcers divided into 12 chunks

----Fethcing KPIs----


 70%|███████   | 14/20 [00:20<00:00, 24.12it/s]ERROR:root:Error fetching data for META: HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Read timed out. (read timeout=30)
100%|██████████| 20/20 [00:30<00:00,  1.51s/it]
100%|██████████| 20/20 [00:00<00:00, 43.29it/s]
100%|██████████| 20/20 [00:00<00:00, 49.15it/s]
100%|██████████| 20/20 [00:00<00:00, 103.87it/s]
100%|██████████| 20/20 [00:00<00:00, 64.44it/s]
100%|██████████| 20/20 [00:00<00:00, 100.82it/s]
100%|██████████| 20/20 [00:00<00:00, 66.23it/s]
100%|██████████| 20/20 [00:00<00:00, 102.87it/s]
100%|██████████| 20/20 [00:00<00:00, 68.33it/s]
100%|██████████| 20/20 [00:00<00:00, 95.47it/s]
100%|██████████| 20/20 [00:00<00:00, 62.41it/s]
100%|██████████| 7/7 [00:00<00:00, 79.85it/s]

KPI Df Shape: (207, 6)





In [None]:
import numpy as np

# Minor data cleaning
kpi_df = kpi_df.replace([np.inf, -np.inf], np.nan)  # Replace infinite values with NaN
kpi_df['trailingPE'] = kpi_df['trailingPE'].astype(float)
# kpi_df.select_dtypes(include=['number']).hist() # to view distribution

valuation_df = pd.merge(kpi_df, sharpe_ratio_df, on='Tickers').dropna()
median_values = valuation_df.describe().loc['50%'].to_dict() # Use median as the data often does not follow normal distrubution

# Get sectors
sector_dict = {} #ticker: sector
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=2, min=1, max=32))
def fetch_sector(ticker):
    return yf.Ticker(ticker).info.get('sector')

for chunk in ticker_chunks:
    for ticker in chunk:
        try:
            sector_dict[ticker] = fetch_sector(ticker)
        except Exception as e:
            print(f"Failed to fetch {ticker} after retries: {e}")

sector_df = pd.DataFrame(list(sector_dict.items()), columns=['Tickers', 'Sector'])
valuation_df = pd.merge(left=sector_df, right=valuation_df, right_on='Tickers', left_on='Tickers')

  sqr = _ensure_numeric((avg - values) ** 2)
ERROR:yfinance:401 Client Error: Unauthorized for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/EQR?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=EQR&crumb=Edge%3A+Too+Many+Requests


{'AAPL': 'Technology',
 'AVGO': 'Technology',
 'BRK-B': 'Financial Services',
 'GOOGL': 'Communication Services',
 'JPM': 'Financial Services',
 'MA': 'Financial Services',
 'META': 'Communication Services',
 'NVDA': 'Technology',
 'SPY': None,
 'TSLA': 'Consumer Cyclical',
 'V': 'Financial Services',
 'WMT': 'Consumer Defensive',
 'ABBV': 'Healthcare',
 'BAC': 'Financial Services',
 'COST': 'Consumer Defensive',
 'NFLX': 'Communication Services',
 'SAP': 'Technology',
 'AXP': 'Financial Services',
 'BX': 'Financial Services',
 'CSCO': 'Technology',
 'IBM': 'Technology',
 'KO': 'Consumer Defensive',
 'TMUS': 'Communication Services',
 'WFC': 'Financial Services',
 'ABT': 'Healthcare',
 'AZN': 'Healthcare',
 'BABA': 'Consumer Cyclical',
 'ISRG': 'Healthcare',
 'MS': 'Financial Services',
 'NVS': 'Healthcare',
 'PM': 'Consumer Defensive',
 'BKNG': 'Consumer Cyclical',
 'BLK': 'Financial Services',
 'GE': 'Industrials',
 'GS': 'Financial Services',
 'HSBC': 'Financial Services',
 'PLTR': 

In [23]:
valuation_df

Unnamed: 0,Tickers,Sector,marketCap,trailingPE,forwardPE,profitMargins,trailingEps,Sharpe_ratios
0,AAPL,Technology,3.207068e+12,33.887302,25.690733,0.24295,6.30,1.202638
1,AVGO,Technology,9.194193e+11,90.948830,31.692057,0.18473,2.15,1.102393
2,BRK-B,Financial Services,1.109812e+12,12.469105,25.627490,0.23960,41.27,1.397108
3,GOOGL,Communication Services,2.029040e+12,20.583334,18.469866,0.28604,8.04,0.761303
4,JPM,Financial Services,6.499279e+11,11.763158,13.885305,0.35060,19.76,0.998637
...,...,...,...,...,...,...,...,...
202,FTV,Technology,2.508277e+10,31.233051,17.676258,0.13365,2.36,0.631259
203,SYF,Financial Services,2.073593e+10,6.238596,8.244205,0.37255,8.55,0.849658
204,AWK,Utilities,2.813475e+10,26.825280,25.319300,0.22438,5.38,1.089991
205,DTE,Utilities,2.806888e+10,19.949852,18.760056,0.11271,6.78,1.291760


In [25]:
grouped = valuation_df.groupby(['Sector']).median(numeric_only=True)

merged_df = pd.merge(valuation_df, grouped, on='Sector', suffixes=('_stock', '_sector'))

merged_df = merged_df[merged_df['Sharpe_ratios_stock'] >= merged_df['Sharpe_ratios_sector']] # above median sharpe, this is a performance measure
merged_df = merged_df[merged_df['profitMargins_stock'] >= merged_df['profitMargins_sector']] # above median prof margin, this is a performance measure
merged_df = merged_df[merged_df['trailingPE_stock'] <= merged_df['trailingPE_sector']] # below median trailing pe ratio, this is a risk adverse measure; sharpe ratio does not take into account over valuation

stock_cols_only = [col for col in merged_df.columns if '_sector' not in col]
filtered_valuation_df = merged_df[stock_cols_only] # export only stock info, no need for sector info

filtered_valuation_df.columns = filtered_valuation_df.columns.str.replace('_stock', '', regex=False) # clean up excesive stock col name


filtered_valuation_df.to_csv(f'{WORKSPACE_DIR}/portfolio_py/data/clean/filtered_valuation_df.csv', index=False)
logging.info(f"Final filtered DataFrame saved. Rows: {filtered_valuation_df.shape[0]}")

# Print the sorted DataFrame (Optional)

end_time = time.time()
elapsed_time = end_time - start_time
print(f'\nTotal Run Time: {elapsed_time} seconds')
filtered_valuation_df.reset_index(drop=True)

INFO:root:Final filtered DataFrame saved. Rows: 36



Total Run Time: 543.9879057407379 seconds


Unnamed: 0,Tickers,Sector,marketCap,trailingPE,forwardPE,profitMargins,trailingEps,Sharpe_ratios
0,AAPL,Technology,3207068000000.0,33.887302,25.690733,0.24295,6.3,1.202638
1,CSCO,Technology,240686500000.0,26.535088,15.51282,0.16958,2.28,1.337912
2,TMUS,Communication Services,292262600000.0,26.526426,23.990627,0.1393,9.65,2.592317
3,BABA,Consumer Cyclical,335156300000.0,20.478956,14.295847,0.12316,6.89,1.942747
4,PM,Consumer Defensive,236152100000.0,25.271215,21.094446,0.18631,6.01,2.656845
5,BKNG,Consumer Cyclical,146535600000.0,25.840403,21.279388,0.24778,172.81,1.064204
6,GE,Industrials,210233600000.0,32.366173,37.616413,0.1694,6.09,1.466126
7,HSBC,Financial Services,202972300000.0,9.204839,41.057552,0.39147,6.2,2.717315
8,ADP,Technology,118708700000.0,30.391666,26.840847,0.19764,9.6,1.29687
9,DE,Industrials,129600200000.0,21.13767,21.538115,0.12993,22.59,1.215406
