### Sharpe Ratio Filter and Export Notebook

1. **Notebook Objective:**
	- This notebook calculates the Sharpe Ratio for a set of stocks obtained from company_tickers.json, which is approximately all NYSE stocks
    - It uses the Sharpe Ratio as a filter to exclude underperforming stocks; this filtering will be implemented in the next notebook to ensure a complete dataset is preserved for potential future use.	
    - Only stocks with a Sharpe Ratio above a certain threshold are retained
	- The filtered stock data is then exported for further analysis or reporting  

<br>

2. **Sharpe Ratio Formula:**
	- $\text{Sharpe Ratio} = \frac{\overline{R_p} - R_f}{\sigma_p}$
        - $\overline{R_p}$: Average return of the portfolio or stock
        - $R_f$: Risk-free rate
        - $\sigma_p$: Standard deviation of portfolio or stock returns

3. Improvements:
    - Notice it has error collecting data when we feed it low n stocks, as in 100, but 1000 works just fine

In [1]:
import time
import os
import sys

from dotenv import load_dotenv

from datetime import datetime
import yfinance as yf
import numpy as np
import pandas as pd

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from utils.finance_utils import calculate_sharpe_ratio
from utils.helpers import divide_chunks
from utils.config import PROGRAM_START_DATE, PROGRAM_END_DATE, N_STOCKS_TO_GET


print(f'Current Working Directory: {os.getcwd()}')


---------------------------------
finance_utils.py successfully loaded, updated on 07/12/2025 2:57
---------------------------------



---------------------------------
helpers.py successfully loaded, updated last Feb. 04 2025
---------------------------------


Updated on 06/12/2025 3:11 2025-07-12
Current Working Directory: /Users/blakeuribe/Desktop/portfolio_py/notebooks


In [2]:
start_time = time.time()

# prevent from collecting redundant data

try:
    sharpe_data = pd.read_csv('../data/clean/sharpe_ratios.csv')
    collected_date = sharpe_data['Date_Collected'][0]

    collected_date = datetime.strptime(collected_date, "%Y-%m-%d").date()
    today = datetime.today().date()

    # Check if within 7 days
    if abs((today - collected_date).days) <= 7:
        user_response = input(f"Data was collected on {collected_date}. Do you still want to proceed? Selecting yes will simply append more data to file, will not replace existing data (yes/no): ").strip().lower()
        if user_response == 'no':
            print("Aborting: Data already collected recently.")
            sys.exit()  # This stops the script completely

except Exception as e:
    print(f"Creating a new directory for Sharpe Data.")


In [3]:

load_dotenv()

print(f'Ending Program at: {PROGRAM_END_DATE}')
print(f'Starting Program at: {PROGRAM_START_DATE}')

master_equity_data = pd.read_csv('../data/clean/master_equity_data.csv')

# Ensure we only try and collect the tickers that actually have yf data
ticker_df = master_equity_data[['Tickers']]


tbill_data = yf.download('^IRX',
                         start=PROGRAM_START_DATE, 
                         end=PROGRAM_END_DATE, 
                         auto_adjust=True)['Close']

tbill_data = tbill_data / 100 / 360  # Convert to daily rate

spy_sharpe = calculate_sharpe_ratio(np.array('spy'), 
                                    tbill=tbill_data, 
                                    start_date=PROGRAM_START_DATE, 
                                    end_date=PROGRAM_END_DATE
)
print(f'Spy Sharpe: {spy_sharpe}')

Ending Program at: 2025-07-12
Starting Program at: 2024-07-12


[*********************100%***********************]  1 of 1 completed
INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***********************]  1 of 1 completed

Spy Sharpe: Ticker
SPY    0.541534
dtype: float64





In [4]:
# Try not to get redudant data
try:
    tickers_not_collected = ticker_df[~ticker_df['Tickers'].isin(sharpe_data['Tickers'])]
    tickers = tickers_not_collected['Tickers'][0:N_STOCKS_TO_GET]  

except Exception as e:
    tickers = ticker_df['Tickers'][0:N_STOCKS_TO_GET]  
    print(e)

In [5]:
import logging
import io
import re

# intended to collect data where yf notifies about error
log_stream = io.StringIO()

yf_logger = logging.getLogger("yfinance")
yf_logger.setLevel(logging.ERROR)  # Capture only error logs

stream_handler = logging.StreamHandler(log_stream)
yf_logger.addHandler(stream_handler)

In [6]:

# Set chunk size and number of stocks
num_in_chunks = 15

# Try not to get redudant data
try:
    tickers_not_collected = ticker_df[~ticker_df['Tickers'].isin(sharpe_data['Tickers'])]
    tickers = tickers_not_collected['Tickers'][0:N_STOCKS_TO_GET]  

except Exception as e:
    tickers = ticker_df['Tickers'][0:N_STOCKS_TO_GET]  
    print(e)



# Get tickers and divide into chunks

ticker_chunks = list(divide_chunks(tickers, num_in_chunks))

# Initialize an empty list to store results
df_list = []
issue_list = []

# Loop through each chunk and process the stocks
for chunk in ticker_chunks:
    try:
        # Calculate Sharpe ratios for the current chunk
        sharpe_ratios_series = calculate_sharpe_ratio(chunk, 
                                                    tbill=tbill_data, 
                                                    start_date=PROGRAM_START_DATE, 
                                                    end_date=PROGRAM_END_DATE
        )
        
        # Create a DataFrame for the chunk
        results_df_chunk = pd.DataFrame({
            'Tickers': sharpe_ratios_series.index, 
            'Sharpe_ratios': sharpe_ratios_series.values
        })
        df_list.append(results_df_chunk)

    except Exception as e:
        print(f'Error calculating Sharpe ratios for chunk: {chunk}\n{str(e)}')
        issue_list = chunk.dropna().values.tolist()



log_contents = log_stream.getvalue()
pattern = r"\['(.+?)'\]: YFPricesMissingError"

failed_tickers = re.findall(pattern, log_contents)

# Clean up handler
yf_logger.removeHandler(stream_handler)
stream_handler.close()

# Extract ticker groups by error type
pattern = r"\['(.+?)'\]"
bad_tickers = re.findall(pattern, log_contents)



clean_bad_tickers = []

for i in range(len(bad_tickers)):
    decompose_list = bad_tickers[i].split()
    for j in range(len(decompose_list)):
        clean_ticker = re.sub(r'[^A-Za-z\-]+', '', decompose_list[j])
        clean_bad_tickers.append(clean_ticker)
        
        
clean_bad_tickers = clean_bad_tickers + issue_list

INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***********************]  15 of 15 completed
ERROR:yfinance:
9 Failed downloads:
ERROR:yfinance:['LGF-B', 'LGF-A', 'LTES', 'BUJA', 'GOLD', 'CAPV', 'BUJAU', 'AHNR', 'LAAB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2024-07-12 -> 2025-07-12) (Yahoo error = "No data found, symbol may be delisted")')
INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***********************]  15 of 15 completed
INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***********************]  15 of 15 completed
INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***************

In [7]:
# Concatenate all DataFrames vertically
file_path = '../data/clean/sharpe_ratios.csv'

try:
    sharpe_df = pd.concat(df_list, axis=0, ignore_index=True)
    
    # Ensure we have a benchmark value for futre use
    sharpe_df.loc[len(sharpe_df)] = ['SPY', float(spy_sharpe.values[0])]

    sharpe_df = sharpe_df.drop_duplicates()

    # Check percentage of NaN values
    pct_of_nan = (sharpe_df.isna().sum().sum() / len(sharpe_df) * 100).round(2)
    print('\n----Df Report----')
    print(f'Pct of NaN values is: {pct_of_nan}%')

    # Save the results to CSV



    sharpe_df['Date_Collected'] = datetime.today().date() # Add collection date
    file_exists = os.path.exists(file_path) # Check file existence

    # export sharpe df
    # Condition based on % of NaNs
    if pct_of_nan >= 25:
        user_input = input(f"The percentage of NaN values is {pct_of_nan:.2f}%. Do you still want to append and/or export the DataFrame? (yes/no): ").strip().lower()
        if user_input == 'yes':
            sharpe_df.to_csv(file_path, mode='a', index=False, header=not file_exists)
            print('Df containing Sharpe Ratios Successfully Exported')
        else:
            print('Export cancelled.')
    else:
        sharpe_df.to_csv(file_path, mode='a', index=False, header=not file_exists)
        print('Nan values are below threshold. Successfully Exported')
except ValueError:
    print('No more values to get')


end_time = time.time()
elapsed_time = end_time - start_time

print('\n----Time Report----')
print(f'Processing time: {elapsed_time:.2f} seconds, for {N_STOCKS_TO_GET} Tickers')


----Df Report----
Pct of NaN values is: 0.5%
Nan values are below threshold. Successfully Exported

----Time Report----
Processing time: 242.56 seconds, for 3000 Tickers


In [8]:
end_sharpe_df = pd.read_csv(f'../data/clean/sharpe_ratios.csv') #grab all sharpes, and just get rid of duplicates

end_sharpe_df = end_sharpe_df.drop_duplicates(subset='Tickers')
end_sharpe_df.to_csv(file_path, index=False)

# Set True where ticker is in sharpe_data
try: 
    master_equity_data.loc[master_equity_data['Tickers'].isin(sharpe_df['Tickers']), 'YF_Have_Data'] = True
except NameError:
    print('No sharpe df')


master_equity_data.loc[master_equity_data['Tickers'].isin(clean_bad_tickers), 'YF_Have_Data'] = False

master_equity_data.to_csv('../data/clean/master_equity_data.csv', index=False)

In [9]:
end_sharpe_df

Unnamed: 0,Tickers,Sharpe_ratios,Date_Collected
0,A,1.965316,2025-07-12
1,AA,1.825040,2025-07-12
2,AAAU,1.938923,2025-07-12
3,AACB,1.960840,2025-07-12
4,AACG,-0.090441,2025-07-12
...,...,...,...
8955,VNRX,0.438269,2025-07-12
8956,VNT,-0.025912,2025-07-12
8957,VNTH,1.590818,2025-07-12
8958,VNUE,0.536493,2025-07-12
