In [11]:
# Standard library imports
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, List, Optional
import logging

# Third-party imports
import numpy as np
import pandas as pd
from tqdm import tqdm
import yfinance as yf

In [12]:
msft_data = yf.download('MSFT', start='2020-01-01', end='2023-01-01', auto_adjust=True)
msft_data.head()

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,MSFT,MSFT,MSFT,MSFT,MSFT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-01-02,158.205765,158.314112,155.950192,156.393425,22622100
2020-01-03,156.235825,157.545836,155.684244,155.940346,21116200
2020-01-06,156.639694,156.708649,154.157567,154.719007,20813700
2020-01-07,155.211456,157.270038,154.95537,156.925308,21634100
2020-01-08,157.683731,158.383066,155.575897,156.541163,27746500


In [13]:
# Download financial statements
msft_info = yf.Ticker('MSFT').info

essential_kpis = [
    'marketCap',
    'trailingPE',
    'profitMargins',
    'trailingEps'
]

In [14]:
def get_essential_kpis(ticker_symbol: str, kpi_list: List[str]) -> Dict[str, Optional[float]]:
    """
    Efficiently retrieve specific KPIs from Yahoo Finance for a given ticker
    """
    try:
        # Get ticker info once to avoid multiple API calls
        stock_info = yf.Ticker(ticker_symbol).info
        
        # Dictionary comprehension for efficient key extraction
        return {
            kpi: stock_info.get(kpi) 
            for kpi in kpi_list
        }
    except Exception as e:
        print(f"Error fetching data for {ticker_symbol}: {str(e)}")
        return {}

# Example usage
essential_kpis = [
    'marketCap',
    'trailingPE',
    'profitMargins',
    'trailingEps'
]

msft_kpis = get_essential_kpis('MSFT', essential_kpis)
msft_kpis

{'marketCap': 3246068596736,
 'trailingPE': 36.11249,
 'profitMargins': 0.35608003,
 'trailingEps': 12.09}

In [15]:
sharpe_ratio_df = pd.read_csv('/Users/blakeuribe/Desktop/portfolio_py/data/clean/sharpe_ratios.csv')
sharpe_ratio_df.head()

Unnamed: 0,ticker,sharpe_ratio
0,LNKS,8.709793
1,IZTC,8.098648
2,JUNS,7.953146
3,AVR,7.717997
4,CGTL,7.352526


In [16]:
tickers = sharpe_ratio_df['ticker']
len(tickers)

5000

In [17]:
import time
from random import uniform
from tenacity import retry, stop_after_attempt, wait_exponential

def get_stock_kpis(ticker: str, kpis: List[str]) -> Dict:
    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=4, max=10)
    )
    def fetch_with_retry(ticker_symbol):
        time.sleep(uniform(1, 2))  # Random delay between 1-2 seconds
        return yf.Ticker(ticker_symbol).info

    try:
        info = fetch_with_retry(ticker)
        data = {kpi: info.get(kpi) for kpi in kpis}
        data['ticker'] = ticker
        return data
    except Exception as e:
        logging.error(f"Error processing {ticker}: {str(e)}")
        return {'ticker': ticker, **{kpi: None for kpi in kpis}}

def batch_process_stocks(tickers: List[str], kpis: List[str], max_workers: int = 5, batch_size: int = 100) -> pd.DataFrame:
    results = []
    
    # Process in smaller batches
    for i in range(0, len(tickers), batch_size):
        batch = tickers[i:i + batch_size]
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_ticker = {
                executor.submit(get_stock_kpis, ticker, kpis): ticker 
                for ticker in batch
            }
            
            for future in tqdm(as_completed(future_to_ticker), total=len(batch)):
                results.append(future.result())
        
        # Add delay between batches
        time.sleep(2)
    
    df = pd.DataFrame(results)
    df.set_index('ticker', inplace=True)
    return df

# Example usage

# Setup logging
logging.basicConfig(level=logging.INFO)

# Process stocks and get DataFrame
kpi_df = batch_process_stocks(tickers, essential_kpis)

100%|██████████| 100/100 [00:32<00:00,  3.12it/s]
100%|██████████| 100/100 [00:33<00:00,  2.99it/s]
100%|██████████| 100/100 [00:32<00:00,  3.11it/s]
100%|██████████| 100/100 [00:33<00:00,  2.97it/s]
100%|██████████| 100/100 [00:32<00:00,  3.12it/s]
100%|██████████| 100/100 [00:31<00:00,  3.15it/s]
100%|██████████| 100/100 [00:32<00:00,  3.05it/s]
100%|██████████| 100/100 [00:32<00:00,  3.09it/s]
100%|██████████| 100/100 [00:32<00:00,  3.09it/s]
100%|██████████| 100/100 [00:33<00:00,  3.01it/s]
100%|██████████| 100/100 [00:31<00:00,  3.14it/s]
 33%|███▎      | 33/100 [00:18<00:55,  1.21it/s]ERROR:root:Error processing RMD: RetryError[<Future at 0x11a300f50 state=finished raised JSONDecodeError>]
 39%|███▉      | 39/100 [00:21<00:30,  2.01it/s]ERROR:root:Error processing EXFY: RetryError[<Future at 0x119a6f690 state=finished raised JSONDecodeError>]
 43%|████▎     | 43/100 [00:22<00:18,  3.08it/s]ERROR:root:Error processing AOSL: RetryError[<Future at 0x119c953d0 state=finished raised J

In [None]:
kpi_df = kpi_df.reset_index()



Unnamed: 0,ticker,marketCap,trailingPE,profitMargins,trailingEps
0,IZTC,176949808.0,,,-0.63
1,JUNS,319887360.0,,,-0.08
2,AVR,200544080.0,,,-1.98
3,CGTL,167748432.0,55.892857,0.06430,0.14
4,LNKS,105726000.0,,-0.03374,-0.03
...,...,...,...,...,...
4995,ITMSF,82999464.0,,,-0.01
4996,OPI,70845840.0,,-0.04749,-0.12
4997,NIXX,69469608.0,,,-5.85
4998,PHUN,77365104.0,,,-8.52


In [48]:
valuation_df = pd.merge(kpi_df, sharpe_ratio_df, on='ticker')
valuation_df = valuation_df.dropna()
valuation_df.to_csv('/Users/blakeuribe/Desktop/portfolio_py/data/clean/valuation_df.csv', index=False)

# Filtering the DataFrame based on the conditions
filtered_valuation_df = valuation_df[
    (valuation_df['profitMargins'] > 0.2) 
    & (valuation_df['sharpe_ratio'] > 0.75) 
    & (valuation_df['marketCap'] > 10_000_000_000)
]

# Print shapes of both dataframes
print(valuation_df.shape)
print(filtered_valuation_df.shape)

# Sort the filtered DataFrame by profitMargins in descending order
filtered_valuation_df = filtered_valuation_df.sort_values('profitMargins', ascending=False)
filtered_valuation_df.to_csv('/Users/blakeuribe/Desktop/portfolio_py/data/clean/filtered_valuation_df.csv', index=False)

# Optionally, print the sorted dataframe (or examine it)
filtered_valuation_df

(2562, 6)
(105, 6)


Unnamed: 0,ticker,marketCap,trailingPE,profitMargins,trailingEps,sharpe_ratio
247,TPL,2.603332e+10,58.138535,0.65343,19.49,2.016304
102,NVDA,3.298803e+12,53.241108,0.55041,2.53,2.524625
809,V,6.152358e+11,32.68621,0.54955,9.72,1.295821
471,MO,9.124857e+10,9.094595,0.50511,5.92,1.652667
895,EWBC,1.329591e+10,12.109849,0.46044,7.92,1.218759
...,...,...,...,...,...,...
1374,SNA,1.803812e+10,17.668379,0.20402,19.45,0.838045
326,ORCL,4.745322e+11,41.481663,0.20396,4.09,1.853960
1104,RELX,8.522124e+10,35.80315,0.20338,1.27,1.036063
500,FFIV,1.478561e+10,26.385983,0.20126,9.56,1.607798
