### Diversification and Top Performers by Sector

1. **Notebook Objective:**
	- This notebook will group stocks by sector
	- It will identify the top *n* stocks in each sector based on user-defined criteria (n is defined in `config.py`)
	- It will compute the correlation matrix of returns across selected stocks
		- Notation: Let $R$ be a matrix of returns, then the correlation matrix $\rho$ is given by  
		  $\rho_{i,j} = \frac{\text{Cov}(R_i, R_j)}{\sigma_{R_i} \cdot \sigma_{R_j}}$
	- It will identify the most diversifiable stocks (those with the lowest correlations)
	- Finally, it will export potential portfolios—those with high Sharpe Ratios and strong performance metrics—to `config.py` for use in later analysis

2. **Bugs**
    - Need to fix function get_corr_pairs in utils.finance_utils.py to allow user to mathc date with program


In [1]:
import time
import os 

start_time = time.time() # measure run time

WORKSPACE_DIR = os.getenv('WORKSPACE_DIR')
if not os.getcwd().endswith('portfolio_py'):
    os.chdir(f'{WORKSPACE_DIR}/portfolio_py')
print(f'Current Working Directory: {os.getcwd()}')

from utils.finance_utils import *
from utils.helpers import separate_corr_pairs
from utils.config import TOP_N_STOCKS

Current Working Directory: /Users/blakeuribe/Desktop/portfolio_py

---------------------------------
finance_utils.py successfully loaded, updated last March. 17 2025 7:32
---------------------------------



---------------------------------
helpers.py successfully loaded, updated last Feb. 04 2025
---------------------------------


Updated on 04/15/2025 5:56


Sector Valuation: Group by sectors, and find best stock in each sector

In [None]:
filtered_valuation_df = pd.read_csv(f'{WORKSPACE_DIR}/portfolio_py/data/clean/filtered_valuation_df.csv')

filtered_valuation_df = filtered_valuation_df[filtered_valuation_df['Sector'] != 'Consumer Cyclical'] # get rid of Consumer Cyclical for a risk aversion method

top_stocks_on_profit = get_top_n_by_sector(filtered_valuation_df, filter_var='profitMargins', top_n=3)
top_stocks_on_sharpe = get_top_n_by_sector(filtered_valuation_df, filter_var='Sharpe_ratios', top_n=3)

filtered_valuation_df.head()

Unnamed: 0,Tickers,Sector,marketCap,trailingPE,forwardPE,profitMargins,trailingEps,quickRatio,earningsQuarterlyGrowth,Sharpe_ratios
1,FOXA,Communication Services,20917320000.0,10.293992,12.86059,0.14393,4.66,2.067,2.422,2.404153
2,SNA,Industrials,16010490000.0,15.647541,15.180914,0.20435,19.52,3.023,0.011,0.908221
3,FFIV,Technology,14909460000.0,25.732336,16.792856,0.20583,10.05,1.304,0.203,2.075538
4,THC,Healthcare,11495490000.0,3.725076,10.629145,0.15485,32.7,1.538,0.303,0.802396
5,FUTU,Financial Services,11089510000.0,15.866534,14.614679,0.45463,5.02,1.178,1.131,1.090228
6,COKE,Consumer Defensive,12344380000.0,20.22857,36.363636,0.09176,70.0,1.615,1.36,2.004112
7,OHI,Real Estate,11157300000.0,24.883871,24.72436,0.38647,1.55,1.145,1.061,1.718461
8,EXEL,Healthcare,10128890000.0,20.5625,21.288235,0.24036,1.76,3.447,0.635,1.6661
9,MLI,Industrials,7858138000.0,13.361582,13.386791,0.1605,5.31,3.792,0.154,1.294666


Diversifaction: Find negative correlated stocks

In [None]:
top_profit_corr_pairs = get_corr_pairs_of_stocks(top_stocks_on_profit['Tickers'].tolist())

top_sharpe_corr_pairs = get_corr_pairs_of_stocks(top_stocks_on_sharpe['Tickers'].tolist())
# top_sharpe_corr_pairs


corr_profit_pairs = top_profit_corr_pairs.index.to_list()
corr_sharpe_pairs = top_sharpe_corr_pairs.index.to_list()

mpt_stocks_for_sharpe = separate_corr_pairs(corr_sharpe_pairs, top_n=TOP_N_STOCKS)
mpt_stocks_for_profit = separate_corr_pairs(corr_profit_pairs, top_n=TOP_N_STOCKS)

print(f'Top Stocks based on Sharpe: {mpt_stocks_for_sharpe}')
print(f'Top Stocks based on Profit Margin: {mpt_stocks_for_profit}')

[***********           22%                       ]  2 of 9 completed


----Begining Diversifaction----


[*********************100%***********************]  9 of 9 completed
[*********************100%***********************]  9 of 9 completed


Unnamed: 0,Correlation
EXEL_THC,-0.039765
FUTU_THC,-0.030001
FFIV_THC,-0.024915
FOXA_THC,-0.004058
SNA_THC,0.012281
MLI_THC,0.304118
COKE_THC,0.392942
FUTU_OHI,0.415609
COKE_FUTU,0.519026
OHI_THC,0.60604


Seperate low corr pairs

In [5]:
corr_profit_pairs = top_profit_corr_pairs.index.to_list()
corr_sharpe_pairs = top_sharpe_corr_pairs.index.to_list()

mpt_stocks_for_sharpe = separate_corr_pairs(corr_sharpe_pairs, top_n=TOP_N_STOCKS)
mpt_stocks_for_profit = separate_corr_pairs(corr_profit_pairs, top_n=TOP_N_STOCKS)
print(f'Top Stocks based on Sharpe: {mpt_stocks_for_sharpe}')
print(f'Top Stocks based on Profit Margin: {mpt_stocks_for_profit}')

Top Stocks based on Sharpe: ['EXEL' 'THC' 'FUTU' 'FFIV' 'FOXA' 'SNA' 'MLI' 'COKE']
Top Stocks based on Profit Margin: ['EXEL' 'THC' 'FUTU' 'FFIV' 'FOXA' 'SNA' 'MLI' 'COKE']


In [6]:
with open('utils/config.py', 'a') as f:
    f.write('\n# New Entry from portfolio_selection.ipynb')
    f.write(f'\nmpt_stocks_for_sharpe = {(mpt_stocks_for_sharpe).tolist()}\n')
    f.write(f'mpt_stocks_for_profit = {(mpt_stocks_for_profit).tolist()}\n')
    # f.write(f'my_pairs = {(my_pairs).tolist()}\n')

In [7]:
filtered_valuation_df[filtered_valuation_df['Tickers'].isin(mpt_stocks_for_sharpe)]

Unnamed: 0,Tickers,Sector,marketCap,trailingPE,forwardPE,profitMargins,trailingEps,quickRatio,earningsQuarterlyGrowth,Sharpe_ratios
1,FOXA,Communication Services,20917320000.0,10.293992,12.86059,0.14393,4.66,2.067,2.422,2.404153
2,SNA,Industrials,16010490000.0,15.647541,15.180914,0.20435,19.52,3.023,0.011,0.908221
3,FFIV,Technology,14909460000.0,25.732336,16.792856,0.20583,10.05,1.304,0.203,2.075538
4,THC,Healthcare,11495490000.0,3.725076,10.629145,0.15485,32.7,1.538,0.303,0.802396
5,FUTU,Financial Services,11089510000.0,15.866534,14.614679,0.45463,5.02,1.178,1.131,1.090228
6,COKE,Consumer Defensive,12344380000.0,20.22857,36.363636,0.09176,70.0,1.615,1.36,2.004112
8,EXEL,Healthcare,10128890000.0,20.5625,21.288235,0.24036,1.76,3.447,0.635,1.6661
9,MLI,Industrials,7858138000.0,13.361582,13.386791,0.1605,5.31,3.792,0.154,1.294666


In [8]:
filtered_valuation_df[filtered_valuation_df['Tickers'].isin(mpt_stocks_for_profit)]

Unnamed: 0,Tickers,Sector,marketCap,trailingPE,forwardPE,profitMargins,trailingEps,quickRatio,earningsQuarterlyGrowth,Sharpe_ratios
1,FOXA,Communication Services,20917320000.0,10.293992,12.86059,0.14393,4.66,2.067,2.422,2.404153
2,SNA,Industrials,16010490000.0,15.647541,15.180914,0.20435,19.52,3.023,0.011,0.908221
3,FFIV,Technology,14909460000.0,25.732336,16.792856,0.20583,10.05,1.304,0.203,2.075538
4,THC,Healthcare,11495490000.0,3.725076,10.629145,0.15485,32.7,1.538,0.303,0.802396
5,FUTU,Financial Services,11089510000.0,15.866534,14.614679,0.45463,5.02,1.178,1.131,1.090228
6,COKE,Consumer Defensive,12344380000.0,20.22857,36.363636,0.09176,70.0,1.615,1.36,2.004112
8,EXEL,Healthcare,10128890000.0,20.5625,21.288235,0.24036,1.76,3.447,0.635,1.6661
9,MLI,Industrials,7858138000.0,13.361582,13.386791,0.1605,5.31,3.792,0.154,1.294666


In [9]:
# my_pairs = separate_corr_pairs(general_pairs.index.to_list(), top_n=8)

# general_pairs = my_pairs(list(filtered_valuation_df['Tickers'].to_list()))
# filtered_valuation_df[filtered_valuation_df['Tickers'].isin(my_pairs)]


In [10]:
# get_corr_pairs_of_stocks(list(my_pairs))
