This Notebook will collect KPI's of users choice on selected stocks from Sharpe Calculations

In [11]:
import time
import os 

start_time = time.time() # measuring run time

WORKSPACE_DIR = os.getenv('WORKSPACE_DIR')
if not os.getcwd().endswith('portfolio_py'):
    os.chdir(f'{WORKSPACE_DIR}/portfolio_py')
print(f'Current Working Directory: {os.getcwd()}')

from utils.helpers import divide_chunks

Current Working Directory: /Users/blakeuribe/Desktop/portfolio_py


In [12]:
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, List, Optional
import pandas as pd
from tenacity import retry, stop_after_attempt, wait_exponential
from tqdm import tqdm
from functools import lru_cache
import yfinance as yf

Set up Logging, and Batch Processes to Aviod Kickbakcs

In [13]:
# Setup logging
logging.basicConfig(level=logging.INFO)

# %% [Function Definitions]
@lru_cache(maxsize=1000)
def fetch_ticker_info_cached(ticker_symbol: str) -> Dict[str, Optional[float]]:
    """
    Cached retrieval of ticker info from Yahoo Finance to minimize repeated API calls.
    """
    try:
        return yf.Ticker(ticker_symbol).info
    except Exception as e:
        logging.error(f"Error fetching data for {ticker_symbol}: {e}")
        return {}

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10)
)

def fetch_with_retry(ticker: str) -> Dict[str, Optional[float]]:
    """
    Retry fetching data for a ticker symbol using tenacity.
    """
    return fetch_ticker_info_cached(ticker)

def get_stock_kpis(ticker: str, kpis: List[str]) -> Dict[str, Optional[float]]:
    """
    Fetch KPIs for a specific stock ticker.
    """
    try:
        info = fetch_with_retry(ticker)
        kpi_data = {kpi: info.get(kpi) for kpi in kpis}
        kpi_data['Tickers'] = ticker  # Add ticker column
        return kpi_data
    except Exception as e:
        logging.error(f"Error processing {ticker}: {e}")
        return {'Tickers': ticker, **{kpi: None for kpi in kpis}}


def batch_process_stocks(tickers: List[str], kpis: List[str], max_workers: int = 10) -> pd.DataFrame:
    """
    Process stock tickers in batches using ThreadPoolExecutor for parallel API calls.
    """
    results = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_ticker = {
            executor.submit(get_stock_kpis, ticker, kpis): ticker
            for ticker in tickers
        }

        for future in tqdm(as_completed(future_to_ticker), total=len(tickers)):
            results.append(future.result())

    # Create DataFrame from results
    df = pd.DataFrame(results)
    return df


In [14]:
# Find Benchmark Sharpe
# # Load the Sharpe ratio DataFrame

sharpe_ratio_df = pd.read_csv(f'{WORKSPACE_DIR}/portfolio_py/data/clean/sharpe_ratios.csv')
spy_sharpe = sharpe_ratio_df.loc[sharpe_ratio_df['Tickers'] == 'SPY', 'Sharpe_ratios'].values[0]

# Filter only stocks above benchmark
sharpe_ratio_df = sharpe_ratio_df[sharpe_ratio_df['Sharpe_ratios'] >= spy_sharpe]

print(f'Spy Sharpe: {spy_sharpe.round(2)}')

Spy Sharpe: 0.56


In [15]:
num_in_chunks = 20
tickers = sharpe_ratio_df['Tickers'].tolist()
tickers = tickers[0:499]

print('\n----Intiating Chunk Process----')
ticker_chunks = list(divide_chunks(tickers, num_in_chunks))
print(f'{len(tickers)} tikcers divided into {len(ticker_chunks)} chunks')
        
essential_kpis = ['marketCap', 'trailingPE', 'forwardPE', 'profitMargins', 'trailingEps']

df_list = []

# Loop through each chunk and process the stocks
print('\n----Fethcing KPIs----')
for chunk in ticker_chunks:
    kpi_df_chunk = batch_process_stocks(chunk, essential_kpis)
    df_list.append(kpi_df_chunk)

# Concatenate all DataFrames vertically
kpi_df = pd.concat(df_list, axis=0, ignore_index=True).dropna()
print(f'KPI Df Shape: {kpi_df.shape}')


----Intiating Chunk Process----
400 tikcers divided into 20 chunks

----Fethcing KPIs----


100%|██████████| 20/20 [00:01<00:00, 17.08it/s]
100%|██████████| 20/20 [00:00<00:00, 80.45it/s]
100%|██████████| 20/20 [00:00<00:00, 111.30it/s]
100%|██████████| 20/20 [00:00<00:00, 71.16it/s]
100%|██████████| 20/20 [00:00<00:00, 94.47it/s]
100%|██████████| 20/20 [00:00<00:00, 94.15it/s]
  0%|          | 0/20 [00:00<?, ?it/s]ERROR:root:Error fetching data for AZO: Too Many Requests. Rate limited. Try after a while.
  5%|▌         | 1/20 [00:00<00:11,  1.64it/s]ERROR:root:Error fetching data for CMI: Too Many Requests. Rate limited. Try after a while.
ERROR:root:Error fetching data for CHTR: Too Many Requests. Rate limited. Try after a while.
ERROR:root:Error fetching data for TFC: Too Many Requests. Rate limited. Try after a while.
ERROR:root:Error fetching data for AMP: Too Many Requests. Rate limited. Try after a while.
ERROR:root:Error fetching data for TRV: Too Many Requests. Rate limited. Try after a while.
ERROR:root:Error fetching data for GM: Too Many Requests. Rate limited. Tr

KeyboardInterrupt: 

In [None]:
import numpy as np
kpi_df = kpi_df.replace([np.inf, -np.inf], np.nan)  # Replace infinite values with NaN

# kpi_df.select_dtypes(include=['number']).hist() # to view distribution


kpi_df['trailingPE'] = kpi_df['trailingPE'].astype(float)
valuation_df = pd.merge(kpi_df, sharpe_ratio_df, on='Tickers').dropna()

median_values = valuation_df.describe().loc['50%'].to_dict() # Use median as the data often does not follow normal distrubution

from utils.finance_utils import get_sector
valuation_df['sector'] = valuation_df['Tickers'].apply(get_sector)

valuation_df

  sqr = _ensure_numeric((avg - values) ** 2)



---------------------------------
finance_utils.py successfully loaded, updated last Feb. 24 2025 5:26
---------------------------------


Error retrieving sector for AAPL: Too Many Requests. Rate limited. Try after a while.
Error retrieving sector for BRK-B: Too Many Requests. Rate limited. Try after a while.
Error retrieving sector for JPM: Too Many Requests. Rate limited. Try after a while.
Error retrieving sector for MA: Too Many Requests. Rate limited. Try after a while.
Error retrieving sector for NVDA: Too Many Requests. Rate limited. Try after a while.
Error retrieving sector for TSLA: Too Many Requests. Rate limited. Try after a while.
Error retrieving sector for GOOGL: Too Many Requests. Rate limited. Try after a while.
Error retrieving sector for WMT: Too Many Requests. Rate limited. Try after a while.
Error retrieving sector for META: Too Many Requests. Rate limited. Try after a while.
Error retrieving sector for COST: Too Many Requests. Rate limited. Try after a while.
Er

KeyboardInterrupt: 

In [None]:
grouped = valuation_df.groupby(['sector']).median(numeric_only=True)

merged_df = pd.merge(valuation_df, grouped, on='sector', suffixes=('_stock', '_sector'))

merged_df = merged_df[merged_df['Sharpe_ratios_stock'] >= merged_df['Sharpe_ratios_sector']] # above median sharpe, this is a performance measure
merged_df = merged_df[merged_df['profitMargins_stock'] >= merged_df['profitMargins_sector']] # above median prof margin, this is a performance measure
merged_df = merged_df[merged_df['trailingPE_stock'] <= merged_df['trailingPE_sector']] # below median trailing pe ratio, this is a risk adverse measure; sharpe ratio does not take into account over valuation

stock_cols_only = [col for col in merged_df.columns if '_sector' not in col]
filtered_valuation_df = merged_df[stock_cols_only] # export only stock info, no need for sector info

filtered_valuation_df.columns = filtered_valuation_df.columns.str.replace('_stock', '', regex=False) # clean up excesive stock col name


filtered_valuation_df.to_csv(f'{WORKSPACE_DIR}/portfolio_py/data/clean/filtered_valuation_df.csv', index=False)
logging.info(f"Final filtered DataFrame saved. Rows: {filtered_valuation_df.shape[0]}")

# Print the sorted DataFrame (Optional)

end_time = time.time()
elapsed_time = end_time - start_time
print(f'\nTotal Run Time: {elapsed_time} seconds')
filtered_valuation_df.reset_index(drop=True)

KeyError: 'profitMargins_stock'

In [None]:
filtered_valuation_df[filtered_valuation_df['Tickers'] == 'NVDA']

Unnamed: 0,marketCap,trailingPE,forwardPE,profitMargins,trailingEps,Tickers,Sharpe_ratios,sector
