### Sharpe Ratio Filter and Export Notebook

1. **Notebook Objective:**
	- This notebook calculates the Sharpe Ratio for a set of stocks obtained from company_tickers.json, which is approximately all NYSE stocks
    - It uses the Sharpe Ratio as a filter to exclude underperforming stocks; this filtering will be implemented in the next notebook to ensure a complete dataset is preserved for potential future use.	
    - Only stocks with a Sharpe Ratio above a certain threshold are retained
	- The filtered stock data is then exported for further analysis or reporting  

<br>

2. **Sharpe Ratio Formula:**
	- $\text{Sharpe Ratio} = \frac{\overline{R_p} - R_f}{\sigma_p}$
        - $\overline{R_p}$: Average return of the portfolio or stock
        - $R_f$: Risk-free rate
        - $\sigma_p$: Standard deviation of portfolio or stock returns

3. Improvements:
    - Need to filter tickers which are not on yahoo finance

In [11]:
import time
import os
import sys

start_time = time.time()

WORKSPACE_DIR = os.getenv('WORKSPACE_DIR')

if not os.getcwd().endswith('portfolio_py'):
    os.chdir(f'{WORKSPACE_DIR}/portfolio_py')
print(f'Current Working Directory: {os.getcwd()}')

from utils.finance_utils import calculate_sharpe_ratio
from utils.helpers import divide_chunks
from utils.config import PROGRAM_START_DATE, PROGRAM_END_DATE

from dotenv import load_dotenv

from datetime import datetime, timedelta
import yfinance as yf
import numpy as np
import pandas as pd


Current Working Directory: /Users/blakeuribe/Desktop/portfolio_py


In [12]:
# prevent from collecting redundant data

try:
    sharpe_data = pd.read_csv('data/clean/sharpe_ratios.csv')
    collected_date = sharpe_data['Date_Collected'][0]

    collected_date = datetime.strptime(collected_date, "%Y-%m-%d").date()
    today = datetime.today().date()

    # Check if within 7 days
    if abs((today - collected_date).days) <= 7:
        user_response = input(f"Data was collected on {collected_date}. Do you still want to proceed? (yes/no): ").strip().lower()
        if user_response == 'no':
            print("Aborting: Data already collected recently.")
            sys.exit()  # This stops the script completely

except Exception as e:
    print(f"Exception occurred: {e}")


In [13]:
load_dotenv()

print(f'Ending Program at: {PROGRAM_END_DATE}')
print(f'Starting Program at: {PROGRAM_START_DATE}')

etf_df = pd.read_csv('data/raw/etf_data_cleaned.csv') # include all etfs with NYSE stocks
etf_df= etf_df[etf_df['Have_Data'] == True] # pre filter, as the dataset is pretty old


nyse_ticker_df = pd.read_json(f'{WORKSPACE_DIR}/portfolio_py/data/raw/company_tickers.json')
nyse_ticker_df = nyse_ticker_df.T.drop(columns=['cik_str', 'title'])

ticker_df = pd.concat([etf_df, nyse_ticker_df]).drop_duplicates().reset_index(drop=True)


tbill_data = yf.download('^IRX', start=PROGRAM_START_DATE, end=PROGRAM_END_DATE, auto_adjust=True)['Close']
tbill_data = tbill_data / 100 / 360  # Convert to daily rate

spy_sharpe = calculate_sharpe_ratio(np.array('spy'), tbill=tbill_data, start_date=PROGRAM_START_DATE, end_date=PROGRAM_END_DATE)
print(f'Spy Sharpe: {spy_sharpe}')

Ending Program at: 2025-04-15
Starting Program at: 2024-04-15


[*********************100%***********************]  1 of 1 completed
INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***********************]  1 of 1 completed

Spy Sharpe: Ticker
SPY    0.337101
dtype: float64





In [14]:

# Set chunk size and number of stocks
num_in_chunks = 15
num_of_stocks_to_get = 1000 # Adjust as needed

# Try not to get redudant data
try:
    tickers_not_collected = ticker_df[~ticker_df['ticker'].isin(sharpe_data['Tickers'])]
    tickers = tickers_not_collected['ticker'][0:num_of_stocks_to_get]  

except Exception as e:
    tickers = ticker_df['ticker'][0:num_of_stocks_to_get]  
    print(e)

# Get tickers and divide into chunks
ticker_chunks = list(divide_chunks(tickers, num_in_chunks))


# Initialize an empty list to store results
df_list = []

# Loop through each chunk and process the stocks
for chunk in ticker_chunks:
    try:
        # Calculate Sharpe ratios for the current chunk
        sharpe_ratios_series = calculate_sharpe_ratio(
            chunk, 
            tbill=tbill_data, 
            start_date=PROGRAM_START_DATE, 
            end_date=PROGRAM_END_DATE
        )
        # Create a DataFrame for the chunk
        results_df_chunk = pd.DataFrame({
            'Tickers': sharpe_ratios_series.index, 
            'Sharpe_ratios': sharpe_ratios_series.values
        })
        df_list.append(results_df_chunk)

    except Exception as e:
        print(f'Error calculating Sharpe ratios for chunk: {chunk}\n{str(e)}')

# Concatenate all DataFrames vertically
final_df = pd.concat(df_list, axis=0, ignore_index=True)


# Ensure we have a benchmark value for futre use
final_df.loc[len(final_df)] = ['SPY', float(spy_sharpe.values[0])]

final_df = final_df.drop_duplicates()

# Check percentage of NaN values
pct_of_nan = (final_df.isna().sum().sum() / len(final_df) * 100).round(2)
print('\n----Df Report----')
print(f'Pct of NaN values is: {pct_of_nan}%')

# Save the results to CSV

file_path = f'{WORKSPACE_DIR}/portfolio_py/data/clean/sharpe_ratios.csv'

final_df['Date_Collected'] = datetime.today().date() # Add collection date
file_exists = os.path.exists(file_path) # Check file existence

# Condition based on % of NaNs
if pct_of_nan >= 25:
    user_input = input(f"The percentage of NaN values is {pct_of_nan:.2f}%. Do you still want to export the DataFrame? (yes/no): ").strip().lower()
    if user_input == 'yes':
        final_df.to_csv(file_path, mode='a', index=False, header=not file_exists)
        print('Df containing Sharpe Ratios Successfully Exported')
    else:
        print('Export cancelled.')
else:
    final_df.to_csv(file_path, mode='a', index=False, header=not file_exists)
    print('Nan values are below threshold. Successfully Exported')


end_time = time.time()
elapsed_time = end_time - start_time

print('\n----Time Report----')
print(f'Processing time: {elapsed_time:.2f} seconds, for {num_of_stocks_to_get} Tickers')

INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***********************]  15 of 15 completed
INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***********************]  15 of 15 completed
INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***********************]  15 of 15 completed
INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***********************]  15 of 15 completed
INFO:backoff_logger:Starting call to 'utils.finance_utils.fetch_data_with_backoff', this is the 1st time calling it.
[*********************100%***********************]  15 of 15 completed
INFO:backoff_logger:Starting call to 'utils.finance_utils.fe


----Df Report----
Pct of NaN values is: 6.01%
Nan values are below threshold. Successfully Exported

----Time Report----
Processing time: 87.91 seconds, for 1000 Tickers





In [15]:
final_df.dropna()

Unnamed: 0,Tickers,Sharpe_ratios,Date_Collected
0,BIL,6.338252,2025-06-15
1,BIS,0.227674,2025-06-15
2,BIV,0.689378,2025-06-15
3,BJK,-0.457320,2025-06-15
4,BKF,0.609976,2025-06-15
...,...,...,...
996,OLCLY,-1.125129,2025-06-15
997,OWL,0.116221,2025-06-15
998,RMD,0.515481,2025-06-15
999,VMC,-0.226669,2025-06-15
