In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

In [2]:
filtered_valuation_df = pd.read_csv('/Users/blakeuribe/Desktop/portfolio_py/data/clean/filtered_valuation_df.csv')
filtered_valuation_df

Unnamed: 0,ticker,marketCap,trailingPE,profitMargins,trailingEps,sharpe_ratio
0,TPL,2.603332e+10,58.138535,0.65343,19.49,2.016304
1,NVDA,3.298803e+12,53.241108,0.55041,2.53,2.524625
2,V,6.152358e+11,32.686210,0.54955,9.72,1.295821
3,MO,9.124857e+10,9.094595,0.50511,5.92,1.652667
4,EWBC,1.329591e+10,12.109849,0.46044,7.92,1.218759
...,...,...,...,...,...,...
100,SNA,1.803812e+10,17.668379,0.20402,19.45,0.838045
101,ORCL,4.745322e+11,41.481663,0.20396,4.09,1.853960
102,RELX,8.522124e+10,35.803150,0.20338,1.27,1.036063
103,FFIV,1.478561e+10,26.385983,0.20126,9.56,1.607798


Sector Valuation: Group by sectors, and find best stock in each sector

In [3]:
def get_sector(ticker: str) -> str:
    try:
        stock = yf.Ticker(ticker)
        return stock.info.get('sector', None)  # Get the sector, return None if not available
    except Exception as e:
        print(f"Error retrieving sector for {ticker}: {e}")
        return None

# Add sector column to the DataFrame
filtered_valuation_df['sector'] = filtered_valuation_df['ticker'].apply(get_sector)
filtered_valuation_df

Unnamed: 0,ticker,marketCap,trailingPE,profitMargins,trailingEps,sharpe_ratio,sector
0,TPL,2.603332e+10,58.138535,0.65343,19.49,2.016304,Energy
1,NVDA,3.298803e+12,53.241108,0.55041,2.53,2.524625,Technology
2,V,6.152358e+11,32.686210,0.54955,9.72,1.295821,Financial Services
3,MO,9.124857e+10,9.094595,0.50511,5.92,1.652667,Consumer Defensive
4,EWBC,1.329591e+10,12.109849,0.46044,7.92,1.218759,Financial Services
...,...,...,...,...,...,...,...
100,SNA,1.803812e+10,17.668379,0.20402,19.45,0.838045,Industrials
101,ORCL,4.745322e+11,41.481663,0.20396,4.09,1.853960,Technology
102,RELX,8.522124e+10,35.803150,0.20338,1.27,1.036063,Industrials
103,FFIV,1.478561e+10,26.385983,0.20126,9.56,1.607798,Technology


In [4]:
# possibly change group by  
grouped_sector = filtered_valuation_df.loc[filtered_valuation_df.groupby('sector')['profitMargins'].idxmax()]

top_n = 3
grouped_sector = filtered_valuation_df.groupby('sector').apply(
    lambda x: x.nlargest(top_n, 'profitMargins')
).reset_index(drop=True)

print(grouped_sector['sector'].unique())
print(f'Sector Count: {len(grouped_sector["sector"].unique())}')

grouped_sector

['Communication Services' 'Consumer Cyclical' 'Consumer Defensive'
 'Energy' 'Financial Services' 'Healthcare' 'Industrials' 'Real Estate'
 'Technology' 'Utilities']
Sector Count: 10


  grouped_sector = filtered_valuation_df.groupby('sector').apply(


Unnamed: 0,ticker,marketCap,trailingPE,profitMargins,trailingEps,sharpe_ratio,sector
0,META,1477458000000.0,27.61916,0.3555,21.19,1.857293,Communication Services
1,GOOGL,2351625000000.0,25.385942,0.27738,7.54,1.344263,Communication Services
2,CSXXY,10210900000.0,64.36905,0.22751,0.84,1.296995,Communication Services
3,MAR,78910490000.0,29.765198,0.42131,9.54,1.35418,Consumer Cyclical
4,TCOM,47166340000.0,25.417545,0.2884,2.85,2.295957,Consumer Cyclical
5,MMYT,12185800000.0,58.421055,0.26643,1.9,2.402054,Consumer Cyclical
6,MO,91248570000.0,9.094595,0.50511,5.92,1.652667,Consumer Defensive
7,PM,193141000000.0,19.71746,0.26417,6.3,1.608123,Consumer Defensive
8,TPL,26033320000.0,58.138535,0.65343,19.49,2.016304,Energy
9,WES,14598090000.0,9.810741,0.42228,3.91,1.439117,Energy


Diversifaction: Find negative correlated portfolios
    Gotta get histoical price, and do it on that

In [5]:

tickers_to_do_corr = grouped_sector['ticker'].tolist()

# # Download the adjusted close price data
grouped_sector_data = yf.download(tickers_to_do_corr, period='1y', auto_adjust=True)['Close']

# # Calculate the correlation matrix
# correlation_matrix = adj_close_data.corr()
correlation_matrix = grouped_sector_data.corr()

# Display the correlation matrix
correlation_matrix


[*********************100%***********************]  28 of 28 completed


Ticker,AER,ATO,AVB,CPRT,CSXXY,DOCS,DTM,EWBC,GDDY,GOOGL,...,PM,SPG,SRE,TCOM,TPL,TRI,TRMB,UTHR,V,WES
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AER,1.0,0.770527,0.889598,0.44728,0.580408,0.60357,0.816588,0.679701,0.828329,0.753732,...,0.831105,0.768552,0.812999,0.6718,0.779248,0.761939,0.428677,0.905898,0.391543,0.931105
ATO,0.770527,1.0,0.932097,0.412264,0.707719,0.890734,0.931376,0.907614,0.892324,0.458827,...,0.952365,0.960953,0.950494,0.748456,0.933825,0.479052,0.637065,0.900552,0.671368,0.683713
AVB,0.889598,0.932097,1.0,0.322624,0.71823,0.776232,0.884951,0.787797,0.878612,0.580712,...,0.969221,0.897601,0.933321,0.678971,0.868306,0.693643,0.46151,0.967349,0.506315,0.823084
CPRT,0.44728,0.412264,0.322624,1.0,0.420514,0.486923,0.570264,0.598584,0.575934,0.455726,...,0.295229,0.504279,0.45062,0.667526,0.553743,0.162475,0.817452,0.323068,0.678732,0.430808
CSXXY,0.580408,0.707719,0.71823,0.420514,1.0,0.633912,0.635188,0.633714,0.648472,0.19139,...,0.711215,0.765169,0.679442,0.537375,0.644135,0.553567,0.549705,0.651171,0.540452,0.55766
DOCS,0.60357,0.890734,0.776232,0.486923,0.633912,1.0,0.906633,0.876505,0.89151,0.482139,...,0.820094,0.925082,0.878462,0.786969,0.888717,0.361145,0.764296,0.769896,0.838913,0.508523
DTM,0.816588,0.931376,0.884951,0.570264,0.635188,0.906633,1.0,0.930304,0.972398,0.681938,...,0.906474,0.940638,0.943168,0.850634,0.971946,0.483631,0.735194,0.895619,0.756315,0.760226
EWBC,0.679701,0.907614,0.787797,0.598584,0.633714,0.876505,0.930304,1.0,0.881986,0.494494,...,0.841218,0.910577,0.902705,0.791622,0.946191,0.328424,0.792342,0.784161,0.797537,0.584278
GDDY,0.828329,0.892324,0.878612,0.575934,0.648472,0.89151,0.972398,0.881986,1.0,0.7185,...,0.897793,0.915134,0.929423,0.812196,0.926937,0.533657,0.691271,0.891896,0.726146,0.779732
GOOGL,0.753732,0.458827,0.580712,0.455726,0.19139,0.482139,0.681938,0.494494,0.7185,1.0,...,0.535204,0.488876,0.590269,0.658294,0.591068,0.548024,0.376787,0.654575,0.382239,0.749083


Get Correlation, but need to find a way to find optimal pair, such that corr is low, all around portofolio

In [6]:
df = correlation_matrix
df_out = df.stack()
df_out = df_out[df_out.index.get_level_values(0) != df_out.index.get_level_values(1)]
df_out.index = df_out.index.map('_'.join)
df_out = df_out.to_frame().T

df_out = df_out.T.rename(columns={0: 'Correlation'}).sort_values(by='Correlation', ascending=True)
df_out = df_out.drop_duplicates(subset=['Correlation'])
df_out = df_out[df_out['Correlation'] <= 0.4]
df_out

Unnamed: 0,Correlation
TRI_MAR,0.0998
TRMB_TRI,0.146316
TRI_CPRT,0.162475
TRI_V,0.168853
CSXXY_GOOGL,0.19139
OHI_CPRT,0.244386
MA_TRI,0.286554
CPRT_PM,0.295229
GOOGL_MA,0.303727
WES_MAR,0.308141


In [7]:
df_out.to_csv('/Users/blakeuribe/Desktop/portfolio_py/data/clean/portfolio_corr.csv', index=False)

In [8]:
corr_pairs = df_out.index.to_list()
corr_pairs

['TRI_MAR',
 'TRMB_TRI',
 'TRI_CPRT',
 'TRI_V',
 'CSXXY_GOOGL',
 'OHI_CPRT',
 'MA_TRI',
 'CPRT_PM',
 'GOOGL_MA',
 'WES_MAR',
 'WES_V',
 'CPRT_AVB',
 'CPRT_UTHR',
 'TRI_EWBC',
 'TRI_DOCS',
 'GOOGL_TRMB',
 'GOOGL_V',
 'TRMB_WES',
 'MAR_AER',
 'V_AER',
 'GOOGL_MAR']

In [9]:
pair_list = []

for stock in corr_pairs:
    pairs = stock.split('_')
    pair_list.append(pairs)
    
pair_list_1d = np.array(pair_list).flatten().tolist()
unique_values = pd.unique(pair_list_1d)[:6]
unique_values

  unique_values = pd.unique(pair_list_1d)[:6]


array(['TRI', 'MAR', 'TRMB', 'CPRT', 'V', 'CSXXY'], dtype=object)

Do mpt on selected stocks above