In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

In [2]:
filtered_valuation_df = pd.read_csv('/Users/blakeuribe/Desktop/portfolio_py/data/clean/filtered_valuation_df.csv')
filtered_valuation_df

Unnamed: 0,ticker,marketCap,trailingPE,profitMargins,trailingEps,sharpe_ratio
0,TPL,2.603332e+10,58.138535,0.65343,19.49,2.016304
1,NVDA,3.298803e+12,53.241108,0.55041,2.53,2.524625
2,V,6.152358e+11,32.686210,0.54955,9.72,1.295821
3,MO,9.124857e+10,9.094595,0.50511,5.92,1.652667
4,EWBC,1.329591e+10,12.109849,0.46044,7.92,1.218759
...,...,...,...,...,...,...
100,SNA,1.803812e+10,17.668379,0.20402,19.45,0.838045
101,ORCL,4.745322e+11,41.481663,0.20396,4.09,1.853960
102,RELX,8.522124e+10,35.803150,0.20338,1.27,1.036063
103,FFIV,1.478561e+10,26.385983,0.20126,9.56,1.607798


Sector Valuation: Group by sectors, and find best stock in each sector

In [3]:
def get_sector(ticker: str) -> str:
    try:
        stock = yf.Ticker(ticker)
        return stock.info.get('sector', None)  # Get the sector, return None if not available
    except Exception as e:
        print(f"Error retrieving sector for {ticker}: {e}")
        return None

# Add sector column to the DataFrame
filtered_valuation_df['sector'] = filtered_valuation_df['ticker'].apply(get_sector)
filtered_valuation_df

Unnamed: 0,ticker,marketCap,trailingPE,profitMargins,trailingEps,sharpe_ratio,sector
0,TPL,2.603332e+10,58.138535,0.65343,19.49,2.016304,Energy
1,NVDA,3.298803e+12,53.241108,0.55041,2.53,2.524625,Technology
2,V,6.152358e+11,32.686210,0.54955,9.72,1.295821,Financial Services
3,MO,9.124857e+10,9.094595,0.50511,5.92,1.652667,Consumer Defensive
4,EWBC,1.329591e+10,12.109849,0.46044,7.92,1.218759,Financial Services
...,...,...,...,...,...,...,...
100,SNA,1.803812e+10,17.668379,0.20402,19.45,0.838045,Industrials
101,ORCL,4.745322e+11,41.481663,0.20396,4.09,1.853960,Technology
102,RELX,8.522124e+10,35.803150,0.20338,1.27,1.036063,Industrials
103,FFIV,1.478561e+10,26.385983,0.20126,9.56,1.607798,Technology


In [4]:
# possibly change group by  
grouped_sector = filtered_valuation_df.loc[filtered_valuation_df.groupby('sector')['profitMargins'].idxmax()]

top_n = 3
grouped_sector = filtered_valuation_df.groupby('sector').apply(
    lambda x: x.nlargest(top_n, 'profitMargins')
).reset_index(drop=True)

print(grouped_sector['sector'].unique())
print(f'Sector Count: {len(grouped_sector["sector"].unique())}')

grouped_sector

['Communication Services' 'Consumer Cyclical' 'Consumer Defensive'
 'Energy' 'Financial Services' 'Healthcare' 'Industrials' 'Real Estate'
 'Technology' 'Utilities']
Sector Count: 10


  grouped_sector = filtered_valuation_df.groupby('sector').apply(


Unnamed: 0,ticker,marketCap,trailingPE,profitMargins,trailingEps,sharpe_ratio,sector
0,META,1477458000000.0,27.61916,0.3555,21.19,1.857293,Communication Services
1,GOOGL,2351625000000.0,25.385942,0.27738,7.54,1.344263,Communication Services
2,CSXXY,10210900000.0,64.36905,0.22751,0.84,1.296995,Communication Services
3,MAR,78910490000.0,29.765198,0.42131,9.54,1.35418,Consumer Cyclical
4,TCOM,47166340000.0,25.417545,0.2884,2.85,2.295957,Consumer Cyclical
5,MMYT,12185800000.0,58.421055,0.26643,1.9,2.402054,Consumer Cyclical
6,MO,91248570000.0,9.094595,0.50511,5.92,1.652667,Consumer Defensive
7,PM,193141000000.0,19.71746,0.26417,6.3,1.608123,Consumer Defensive
8,TPL,26033320000.0,58.138535,0.65343,19.49,2.016304,Energy
9,WES,14598090000.0,9.810741,0.42228,3.91,1.439117,Energy


Diversifaction: Find negative correlated portfolios
    Gotta get histoical price, and do it on that

In [5]:

tickers_to_do_corr = grouped_sector['ticker'].tolist()

# # Download the adjusted close price data
grouped_sector_data = yf.download(tickers_to_do_corr, period='1y', auto_adjust=True)['Close']

# # Calculate the correlation matrix
# correlation_matrix = adj_close_data.corr()
correlation_matrix = grouped_sector_data.corr()

# Display the correlation matrix
correlation_matrix


[*********************100%***********************]  28 of 28 completed


Ticker,AER,ATO,AVB,CPRT,CSXXY,DOCS,DTM,EWBC,GDDY,GOOGL,...,PM,SPG,SRE,TCOM,TPL,TRI,TRMB,UTHR,V,WES
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AER,1.0,0.763747,0.884771,0.481864,0.601855,0.596525,0.82167,0.67867,0.834287,0.764161,...,0.825356,0.768934,0.797251,0.683033,0.778942,0.792132,0.450318,0.904586,0.408365,0.93703
ATO,0.763747,1.0,0.932827,0.421901,0.714465,0.890624,0.933015,0.907896,0.891261,0.461628,...,0.952879,0.960461,0.953573,0.74906,0.934848,0.497383,0.638544,0.899483,0.672586,0.678827
AVB,0.884771,0.932827,1.0,0.342995,0.72671,0.778047,0.892961,0.790874,0.883804,0.591576,...,0.969576,0.900487,0.932069,0.686239,0.873615,0.705269,0.472806,0.968471,0.515188,0.821511
CPRT,0.481864,0.421901,0.342995,1.0,0.446459,0.488895,0.586775,0.604654,0.593911,0.478318,...,0.312308,0.517367,0.45297,0.680013,0.565001,0.226964,0.825593,0.347802,0.688049,0.466659
CSXXY,0.601855,0.714465,0.72671,0.446459,1.0,0.641284,0.654047,0.643354,0.666832,0.224355,...,0.718758,0.774729,0.678881,0.556298,0.658434,0.575045,0.5694,0.666064,0.557981,0.58201
DOCS,0.596525,0.890624,0.778047,0.488895,0.641284,1.0,0.902529,0.875801,0.885224,0.475377,...,0.821436,0.923094,0.886459,0.783043,0.886065,0.381599,0.759311,0.766767,0.835494,0.500803
DTM,0.82167,0.933015,0.892961,0.586775,0.654047,0.902529,1.0,0.931741,0.972791,0.683346,...,0.912157,0.942554,0.952337,0.854395,0.971811,0.527641,0.736889,0.899786,0.756142,0.763264
EWBC,0.67867,0.907896,0.790874,0.604654,0.643354,0.875801,0.931741,1.0,0.881354,0.497255,...,0.843104,0.910683,0.906448,0.792277,0.947036,0.357551,0.792953,0.785253,0.798476,0.585041
GDDY,0.834287,0.891261,0.883804,0.593911,0.666832,0.885224,0.972791,0.881354,1.0,0.722539,...,0.90028,0.915615,0.931738,0.816396,0.925962,0.575271,0.695232,0.895037,0.727248,0.785014
GOOGL,0.764161,0.461628,0.591576,0.478318,0.224355,0.475377,0.683346,0.497255,0.722539,1.0,...,0.542956,0.494244,0.596913,0.665393,0.591523,0.591326,0.384021,0.662372,0.384968,0.756208


Get Correlation, but need to find a way to find optimal pair, such that corr is low, all around portofolio

In [6]:
df = correlation_matrix
df_out = df.stack()
df_out = df_out[df_out.index.get_level_values(0) != df_out.index.get_level_values(1)]
df_out.index = df_out.index.map('_'.join)
df_out = df_out.to_frame().T

df_out = df_out.T.rename(columns={0: 'Correlation'}).sort_values(by='Correlation', ascending=True)
df_out = df_out.drop_duplicates(subset=['Correlation'])
df_out = df_out[df_out['Correlation'] <= 0.4]
df_out

Unnamed: 0,Correlation
MAR_TRI,0.150892
TRI_TRMB,0.204053
V_TRI,0.21799
GOOGL_CSXXY,0.224355
TRI_CPRT,0.226964
CPRT_OHI,0.269596
PM_CPRT,0.312308
MA_GOOGL,0.322599
MAR_WES,0.328115
V_WES,0.329753


In [7]:
df_out.to_csv('/Users/blakeuribe/Desktop/portfolio_py/data/clean/portfolio_corr.csv', index=False)

In [8]:
corr_pairs = df_out.index.to_list()
corr_pairs
corr_pairs

['MAR_TRI',
 'TRI_TRMB',
 'V_TRI',
 'GOOGL_CSXXY',
 'TRI_CPRT',
 'CPRT_OHI',
 'PM_CPRT',
 'MA_GOOGL',
 'MAR_WES',
 'V_WES',
 'MA_TRI',
 'AVB_CPRT',
 'CPRT_UTHR',
 'TRI_EWBC',
 'DOCS_TRI',
 'TRMB_GOOGL',
 'GOOGL_V']

In [9]:
pair_list = []

for stock in corr_pairs:
    pairs = stock.split('_')
    pair_list.append(pairs)
    
pair_list_1d = np.array(pair_list).flatten().tolist()
unique_values = pd.unique(pair_list_1d)[:6]
unique_values

  unique_values = pd.unique(pair_list_1d)[:6]


array(['MAR', 'TRI', 'TRMB', 'V', 'GOOGL', 'CSXXY'], dtype=object)

Do mpt on selected stocks above