### Diversification and Top Performers by Sector

1. **Notebook Objective:**
	- This notebook will group stocks by sector
	- It will identify the top *n* stocks in each sector based on user-defined criteria (n is defined in `config.py`)
	- It will compute the correlation matrix of returns across selected stocks
		- Notation: Let $R$ be a matrix of returns, then the correlation matrix $\rho$ is given by  
		  $\rho_{i,j} = \frac{\text{Cov}(R_i, R_j)}{\sigma_{R_i} \cdot \sigma_{R_j}}$
	- It will identify the most diversifiable stocks (those with the lowest correlations)
	- Finally, it will export potential portfolios—those with high Sharpe Ratios and strong performance metrics—to `config.py` for use in later analysis

2. **Bugs**
    - Need to fix function get_corr_pairs in utils.finance_utils.py to allow user to match date with program


In [1]:
import time
import os 

start_time = time.time() # measure run time

WORKSPACE_DIR = os.getenv('WORKSPACE_DIR')
if not os.getcwd().endswith('portfolio_py'):
    os.chdir(f'{WORKSPACE_DIR}/portfolio_py')
print(f'Current Working Directory: {os.getcwd()}')

from utils.finance_utils import *
from utils.helpers import separate_corr_pairs
from utils.config import TOP_N_STOCKS

Current Working Directory: /Users/blakeuribe/Desktop/portfolio_py

---------------------------------
finance_utils.py successfully loaded, updated last April. 29 2025 4:55
---------------------------------



---------------------------------
helpers.py successfully loaded, updated last Feb. 04 2025
---------------------------------


Updated on 04/15/2025 5:56


Sector Valuation: Group by sectors, and find best stock in each sector

In [2]:
filtered_valuation_df = pd.read_csv(f'{WORKSPACE_DIR}/portfolio_py/data/clean/filtered_valuation_df.csv')

filtered_valuation_df = filtered_valuation_df[filtered_valuation_df['Sector'] != 'Consumer Cyclical'] # get rid of Consumer Cyclical for a risk aversion method

top_stocks_on_profit = get_top_n_by_sector(filtered_valuation_df, filter_var='profitMargins', top_n=3)
top_stocks_on_sharpe = get_top_n_by_sector(filtered_valuation_df, filter_var='Sharpe_ratios', top_n=3)

filtered_valuation_df.head()

Unnamed: 0,Tickers,Sector,marketCap,trailingPE,forwardPE,profitMargins,trailingEps,quickRatio,earningsQuarterlyGrowth,Sharpe_ratios,Date_Collected
1,MMM,Industrials,82309400000.0,19.046078,19.359493,0.17791,8.03,1.114,0.203,1.29764,2025-06-15
2,MPLX,Energy,52101630000.0,11.787529,11.573697,0.38998,4.33,1.027,0.12,1.503437,2025-06-15
3,GRMN,Technology,41352030000.0,28.333775,28.789545,0.22762,7.58,2.615,0.206,0.901269,2025-06-15
4,ANYYY,Industrials,40178850000.0,16.910257,8.912162,0.33547,0.78,1.218,0.154,1.266057,2025-07-04
6,CHKP,Technology,24234560000.0,29.50264,22.49799,0.32746,7.58,1.043,0.038,1.234018,2025-07-04


Diversifaction: Find negative correlated stocks

In [3]:
top_profit_corr_pairs = get_corr_pairs_of_stocks(top_stocks_on_profit['Tickers'].tolist())

top_sharpe_corr_pairs = get_corr_pairs_of_stocks(top_stocks_on_sharpe['Tickers'].tolist())
# top_sharpe_corr_pairs


corr_profit_pairs = top_profit_corr_pairs.index.to_list()
corr_sharpe_pairs = top_sharpe_corr_pairs.index.to_list()

mpt_stocks_for_sharpe = separate_corr_pairs(corr_sharpe_pairs, top_n=TOP_N_STOCKS)
mpt_stocks_for_profit = separate_corr_pairs(corr_profit_pairs, top_n=TOP_N_STOCKS)

print(f'Top Stocks based on Sharpe: {mpt_stocks_for_sharpe}')
print(f'Top Stocks based on Profit Margin: {mpt_stocks_for_profit}')

[*********************100%***********************]  13 of 13 completed
[*********************100%***********************]  13 of 13 completed

Top Stocks based on Sharpe: ['AU' 'CWAN' 'CALM' 'HALO' 'KGC' 'ANYYY' 'MLI' 'EPRT']
Top Stocks based on Profit Margin: ['AU' 'CWAN' 'CALM' 'HALO' 'KGC' 'ANYYY' 'MLI' 'EPRT']





Seperate low corr pairs

In [4]:
corr_profit_pairs = top_profit_corr_pairs.index.to_list()
corr_sharpe_pairs = top_sharpe_corr_pairs.index.to_list()

mpt_stocks_for_sharpe = separate_corr_pairs(corr_sharpe_pairs, top_n=TOP_N_STOCKS)
mpt_stocks_for_profit = separate_corr_pairs(corr_profit_pairs, top_n=TOP_N_STOCKS)
print(f'Top Stocks based on Sharpe: {mpt_stocks_for_sharpe}')
print(f'Top Stocks based on Profit Margin: {mpt_stocks_for_profit}')

Top Stocks based on Sharpe: ['AU' 'CWAN' 'CALM' 'HALO' 'KGC' 'ANYYY' 'MLI' 'EPRT']
Top Stocks based on Profit Margin: ['AU' 'CWAN' 'CALM' 'HALO' 'KGC' 'ANYYY' 'MLI' 'EPRT']


In [5]:
with open('utils/config.py', 'a') as f:
    f.write('\n# New Entry from portfolio_selection.ipynb')
    f.write(f'\nmpt_stocks_for_sharpe = {(mpt_stocks_for_sharpe).tolist()}\n')
    f.write(f'mpt_stocks_for_profit = {(mpt_stocks_for_profit).tolist()}\n')
    # f.write(f'my_pairs = {(my_pairs).tolist()}\n')

In [6]:
filtered_valuation_df[filtered_valuation_df['Tickers'].isin(mpt_stocks_for_sharpe)]

Unnamed: 0,Tickers,Sector,marketCap,trailingPE,forwardPE,profitMargins,trailingEps,quickRatio,earningsQuarterlyGrowth,Sharpe_ratios,Date_Collected
4,ANYYY,Industrials,40178850000.0,16.910257,8.912162,0.33547,0.78,1.218,0.154,1.266057,2025-07-04
8,AU,Basic Materials,23546420000.0,15.214984,16.985455,0.21093,3.07,1.359,6.638,1.510958,2025-07-04
9,KGC,Basic Materials,19457050000.0,16.183672,18.022726,0.2174,0.98,1.146,2.439,2.08809,2025-07-04
11,MLI,Industrials,9144116000.0,15.052824,15.592452,0.1592,5.49,3.126,0.138,1.083195,2025-07-04
12,CWAN,Technology,6274365000.0,13.023529,41.773586,0.90134,1.7,4.79,2.43,0.980751,2025-07-04
13,HALO,Healthcare,6653934000.0,14.361702,11.157024,0.44763,3.76,6.999,0.537,0.680823,2025-07-04
14,EPRT,Real Estate,6348015000.0,27.62931,25.23622,0.44617,1.16,2.904,0.194,1.12081,2025-07-04
15,CALM,Consumer Defensive,4990043000.0,5.083992,31.956522,0.26081,20.24,3.249,2.466,1.63913,2025-07-04


In [7]:
filtered_valuation_df[filtered_valuation_df['Tickers'].isin(mpt_stocks_for_profit)]

Unnamed: 0,Tickers,Sector,marketCap,trailingPE,forwardPE,profitMargins,trailingEps,quickRatio,earningsQuarterlyGrowth,Sharpe_ratios,Date_Collected
4,ANYYY,Industrials,40178850000.0,16.910257,8.912162,0.33547,0.78,1.218,0.154,1.266057,2025-07-04
8,AU,Basic Materials,23546420000.0,15.214984,16.985455,0.21093,3.07,1.359,6.638,1.510958,2025-07-04
9,KGC,Basic Materials,19457050000.0,16.183672,18.022726,0.2174,0.98,1.146,2.439,2.08809,2025-07-04
11,MLI,Industrials,9144116000.0,15.052824,15.592452,0.1592,5.49,3.126,0.138,1.083195,2025-07-04
12,CWAN,Technology,6274365000.0,13.023529,41.773586,0.90134,1.7,4.79,2.43,0.980751,2025-07-04
13,HALO,Healthcare,6653934000.0,14.361702,11.157024,0.44763,3.76,6.999,0.537,0.680823,2025-07-04
14,EPRT,Real Estate,6348015000.0,27.62931,25.23622,0.44617,1.16,2.904,0.194,1.12081,2025-07-04
15,CALM,Consumer Defensive,4990043000.0,5.083992,31.956522,0.26081,20.24,3.249,2.466,1.63913,2025-07-04


In [8]:
# my_pairs = separate_corr_pairs(general_pairs.index.to_list(), top_n=8)

# general_pairs = my_pairs(list(filtered_valuation_df['Tickers'].to_list()))
# filtered_valuation_df[filtered_valuation_df['Tickers'].isin(my_pairs)]


In [9]:
# get_corr_pairs_of_stocks(list(my_pairs))
