########## choose stock list (aka market) ##################

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date

In [5]:
## To acquire stock list in S&P500 ###
ex1 = pd.read_csv('stock_info_s&p500.csv')
df = pd.DataFrame(ex1['Symbol'].astype('str'))
df = df.rename(columns={'Symbol' : 'ticker'})
df = df.drop_duplicates()
df = df.reset_index(drop=True)
df
df

Unnamed: 0,ticker
0,AAPL
1,MSFT
2,NVDA
3,GOOGL
4,GOOG
...,...
498,BBWI
499,BWA
500,NCLH
501,AAL


### Acquire informations from yfinance function >> .info

In [6]:
# 1st pulling date for this data #
df['date_pulling'] = date.today()

In [7]:
# 2nd acquiring information from .info #
info_attribute_list = [
    'industry',
    'sector',
    'enterpriseValue',
    'totalCashPerShare'
]
# 3rd acquiring information from .quarterly_balance_sheet #
balancesheet_list = [
    'Total Assets',
    'Current Liabilities'
]

# 4th acquiring information from .quarterly_financials #
ttm = 4
financials_list = [
    'EBIT',
    'Operating Income'
]

info_attribute_list_buffer = []
balancesheet_list_buffer = []
financials_list_buffer = []
for i in range(len(df)):
    # print(i, " ",df['ticker'][i])
    yfticker = yf.Ticker(df['ticker'][i])

    # 2nd acquiring information from .info #
    info_attribute_list_buffer_r = []
    for j in range(len(info_attribute_list)):
        try:
            info_attribute_list_buffer_r.append(yfticker.info[info_attribute_list[j]])
        except:
            info_attribute_list_buffer_r.append(None)
    info_attribute_list_buffer.append(info_attribute_list_buffer_r)

    # 3rd acquiring information from .balancesheet #
    balancesheet_list_buffer_r = []
    for j in range(len(balancesheet_list)):
        try:
            balancesheet_list_buffer_r.append(yfticker.quarterly_balance_sheet.loc[yfticker.quarterly_balance_sheet.index == balancesheet_list[j]].values[0][0])
        except:
            balancesheet_list_buffer_r.append(None)
    balancesheet_list_buffer.append(balancesheet_list_buffer_r)

    # 4th acquiring information from .quarterly_financials #
    financials_list_buffer_r = []
    for j in range(len(financials_list)):
        try:
            financials_list_buffer_r.append(np.array([yfticker.quarterly_financials.loc[yfticker.quarterly_financials.index == financials_list[j]].values[0][i] for i in range(ttm)]).sum())
        except:
            financials_list_buffer_r.append(None)
    try:
        financials_list_buffer_r.append(yfticker.quarterly_financials.columns[0])
    except:
        financials_list_buffer_r.append(None)
    financials_list_buffer.append(financials_list_buffer_r)

financials_list.append('ttm_latest')
df = df.join(pd.DataFrame(info_attribute_list_buffer, columns=info_attribute_list))
df = df.join(pd.DataFrame(balancesheet_list_buffer, columns=balancesheet_list))
df = df.join(pd.DataFrame(financials_list_buffer, columns=financials_list))


#### Drop NAN #############
df = df.dropna()
##### Drop where marketcap and EBIT < 0#############
df = df.loc[df['enterpriseValue'] >0]
df = df.loc[df['EBIT'] >0]
df = df.loc[df['Operating Income'] >0]
df = df.reset_index(drop=True)

df.to_csv('data_stock_s&p500.csv',index= False)

############################################################################

In [8]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date
df = pd.read_csv('data_stock_s&p500.csv')
print('sector', df['sector'].unique())
print('industry', df['industry'].unique())

sector ['Technology' 'Communication Services' 'Consumer Cyclical' 'Healthcare'
 'Consumer Defensive' 'Energy' 'Financial Services' 'Basic Materials'
 'Industrials' 'Utilities' 'Real Estate']
industry ['Consumer Electronics' 'Software - Infrastructure' 'Semiconductors'
 'Internet Content & Information' 'Internet Retail'
 'Drug Manufacturers - General' 'Auto Manufacturers' 'Discount Stores'
 'Healthcare Plans' 'Oil & Gas Integrated' 'Credit Services'
 'Household & Personal Products' 'Home Improvement Retail'
 'Beverages - Non-Alcoholic' 'Entertainment' 'Software - Application'
 'Diagnostics & Research' 'Telecom Services' 'Specialty Chemicals'
 'Information Technology Services' 'Restaurants' 'Communication Equipment'
 'Medical Devices' 'Aerospace & Defense' 'Tobacco'
 'Semiconductor Equipment & Materials' 'Medical Instruments & Supplies'
 'Farm & Heavy Construction Machinery' 'Utilities - Regulated Electric'
 'Financial Data & Stock Exchanges' 'Railroads' 'Biotechnology'
 'Asset Managemen

In [9]:

### Next I perform calculations for MF_ROC and MF_EY ###
# represents_earning = 'Operating Income'
represents_earning = 'EBIT'

df['MF_ROC'] = df[represents_earning]/(df['Total Assets'] - df['Current Liabilities'])
df['MF_EY'] = df[represents_earning]/df['enterpriseValue']
df

Unnamed: 0,ticker,date_pulling,industry,sector,enterpriseValue,totalCashPerShare,Total Assets,Current Liabilities,EBIT,Operating Income,ttm_latest,MF_ROC,MF_EY
0,AAPL,2024-08-19,Consumer Electronics,Technology,3.476398e+12,4.065,3.316120e+11,1.316240e+11,1.216250e+11,1.205940e+11,2024-06-30,0.608161,0.034986
1,MSFT,2024-08-19,Software - Infrastructure,Technology,3.132824e+12,10.162,5.121630e+11,1.252860e+11,1.107220e+11,1.094330e+11,2024-06-30,0.286194,0.035343
2,NVDA,2024-08-19,Semiconductors,Technology,3.044467e+12,1.278,7.707200e+10,1.522300e+10,4.914300e+10,4.774000e+10,2024-04-30,0.794564,0.016142
3,GOOGL,2024-08-19,Internet Content & Information,Communication Services,1.934032e+12,8.182,4.147700e+11,7.791300e+10,1.018210e+11,9.793700e+10,2024-06-30,0.302268,0.052647
4,GOOG,2024-08-19,Internet Content & Information,Communication Services,1.955943e+12,8.182,4.147700e+11,7.791300e+10,1.018210e+11,9.793700e+10,2024-06-30,0.302268,0.052057
...,...,...,...,...,...,...,...,...,...,...,...,...,...
416,BBWI,2024-08-19,Specialty Retail,Consumer Cyclical,1.212913e+10,3.830,5.221000e+09,1.221000e+09,1.365000e+09,1.292000e+09,2024-04-30,0.341250,0.112539
417,BWA,2024-08-19,Auto Parts,Consumer Cyclical,1.021238e+10,5.655,1.410800e+10,3.712000e+09,1.238000e+09,1.261000e+09,2024-06-30,0.119084,0.121225
418,NCLH,2024-08-19,Travel Services,Consumer Cyclical,2.059400e+10,1.351,2.011383e+10,6.633916e+09,1.204430e+09,1.207603e+09,2024-06-30,0.089350,0.058485
419,AAL,2024-08-19,Airlines,Industrials,3.705987e+10,13.685,6.412500e+10,2.461000e+10,1.996000e+09,2.852000e+09,2024-06-30,0.050512,0.053859


In [10]:
### Drop Utilities, Energy, and Financial Services  as suggested from the book ###
sectortoexclude = [
    'Utilities',
    'Energy',
    'Financial Services',
    'Real Estate'
]
for i in sectortoexclude:
    print('exclude sector > ',i)
    try:
         
        df = df.loc[df['sector'] != i]
    except:
        None
        
industrytoexclude =[
    'Engineering & Construction',
    'Building Products & Equipment',
    'Building Materials'
]
for i in industrytoexclude:
    print('exclude industry > ',i)
    try:
         
        df = df.loc[df['industry'] != i]
    except:
        None

df = df.reset_index(drop=True)

exclude sector >  Utilities
exclude sector >  Energy
exclude sector >  Financial Services
exclude sector >  Real Estate
exclude industry >  Engineering & Construction
exclude industry >  Building Products & Equipment
exclude industry >  Building Materials


In [11]:
#### Select market size by choosing ' market ' ###
market =  50000000### in USD
df_market = df.loc[df['enterpriseValue'] >= market]
df_market = df_market.reset_index(drop=True)

In [12]:
### Ranking regarding MFs####

df_market['Ranking_MF_ROC'] = df_market['MF_ROC'].rank()
df_market['Ranking_MF_EY'] = df_market['MF_EY'].rank()
df_market['Ranking_MF'] = df_market['Ranking_MF_ROC'] + df_market['Ranking_MF_EY']

df_market.loc[df_market['Ranking_MF'] == df_market['Ranking_MF'].max()]

Unnamed: 0,ticker,date_pulling,industry,sector,enterpriseValue,totalCashPerShare,Total Assets,Current Liabilities,EBIT,Operating Income,ttm_latest,MF_ROC,MF_EY,Ranking_MF_ROC,Ranking_MF_EY,Ranking_MF
76,MO,2024-08-19,Tobacco,Consumer Defensive,110837400000.0,1.054,34387000000.0,7782000000.0,14676000000.0,11409000000.0,2024-06-30,0.551626,0.13241,303.0,303.0,606.0


In [13]:
numstocks = 50
df_sorted = df_market.sort_values(by=['Ranking_MF'],ascending=False)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted[:numstocks]

Unnamed: 0,ticker,date_pulling,industry,sector,enterpriseValue,totalCashPerShare,Total Assets,Current Liabilities,EBIT,Operating Income,ttm_latest,MF_ROC,MF_EY,Ranking_MF_ROC,Ranking_MF_EY,Ranking_MF
0,MO,2024-08-19,Tobacco,Consumer Defensive,110837400000.0,1.054,34387000000.0,7782000000.0,14676000000.0,11409000000.0,2024-06-30,0.551626,0.13241,303.0,303.0,606.0
1,JBL,2024-08-19,Electronic Components,Technology,12841900000.0,21.887,17449000000.0,11423000000.0,1990000000.0,1520000000.0,2024-05-31,0.330236,0.154962,278.0,309.0,587.0
2,BBWI,2024-08-19,Specialty Retail,Consumer Cyclical,12129130000.0,3.83,5221000000.0,1221000000.0,1365000000.0,1292000000.0,2024-04-30,0.34125,0.112539,282.0,296.0,578.0
3,ULTA,2024-08-19,Specialty Retail,Consumer Cyclical,19368220000.0,10.994,5631939000.0,1572724000.0,1647678000.0,1647678000.0,2024-04-30,0.405911,0.085071,291.0,279.0,570.0
4,PHM,2024-08-19,Residential Construction,Consumer Cyclical,26320780000.0,6.723,16544070000.0,3290047000.0,3711561000.0,3607165000.0,2024-06-30,0.280033,0.141013,259.0,308.0,567.0
5,NVR,2024-08-19,Residential Construction,Consumer Cyclical,25286080000.0,792.331,6407288000.0,1260029000.0,2055058000.0,2050196000.0,2024-06-30,0.399253,0.081272,290.0,275.0,565.0
6,EBAY,2024-08-19,Internet Retail,Consumer Cyclical,30150500000.0,10.564,20378000000.0,5799000000.0,3905000000.0,2045000000.0,2024-06-30,0.267851,0.129517,256.0,302.0,558.0
7,HPQ,2024-08-19,Computer Hardware,Technology,43451080000.0,2.572,37433000000.0,24839000000.0,3909000000.0,4305000000.0,2024-04-30,0.310386,0.089963,274.0,281.0,555.0
8,LULU,2024-08-19,Apparel Retail,Consumer Cyclical,31764510000.0,15.205,6828495000.0,1383571000.0,2238405000.0,2238405000.0,2024-04-30,0.411099,0.070469,292.0,257.0,549.0
9,BKNG,2024-08-19,Travel Services,Consumer Cyclical,122330100000.0,487.019,28541000000.0,18206000000.0,7364000000.0,6357000000.0,2024-06-30,0.71253,0.060198,307.0,231.0,538.0


In [16]:
df_sorted.loc[df_sorted['ticker'] == 'NVO']

Unnamed: 0,ticker,date_pulling,industry,sector,enterpriseValue,totalCashPerShare,Total Assets,Current Liabilities,EBIT,Operating Income,ttm_latest,MF_ROC,MF_EY,Ranking_MF_ROC,Ranking_MF_EY,Ranking_MF


In [15]:
df_sorted_2 = df_sorted[:numstocks].copy()
df_sorted_2 = df_sorted_2.sort_values(by=['totalCashPerShare'],ascending=False)
df_sorted_2 = df_sorted_2.reset_index(drop=True)
df_sorted_2

Unnamed: 0,ticker,date_pulling,industry,sector,enterpriseValue,totalCashPerShare,Total Assets,Current Liabilities,EBIT,Operating Income,ttm_latest,MF_ROC,MF_EY,Ranking_MF_ROC,Ranking_MF_EY,Ranking_MF
0,NVR,2024-08-19,Residential Construction,Consumer Cyclical,25286080000.0,792.331,6407288000.0,1260029000.0,2055058000.0,2050196000.0,2024-06-30,0.399253,0.081272,290.0,275.0,565.0
1,BKNG,2024-08-19,Travel Services,Consumer Cyclical,122330100000.0,487.019,28541000000.0,18206000000.0,7364000000.0,6357000000.0,2024-06-30,0.71253,0.060198,307.0,231.0,538.0
2,MOH,2024-08-19,Healthcare Plans,Healthcare,14191240000.0,148.481,15185000000.0,7836000000.0,1535000000.0,1535000000.0,2024-06-30,0.208872,0.108165,224.0,294.0,518.0
3,RL,2024-08-19,Apparel Manufacturing,Consumer Cyclical,11035160000.0,28.423,6641000000.0,1573600000.0,866500000.0,831200000.0,2024-06-30,0.170995,0.078522,190.0,269.0,459.0
4,DE,2024-08-19,Farm & Heavy Construction Machinery,Industrials,162522000000.0,26.08,105628000000.0,39284000000.0,15054000000.0,14080000000.0,2024-04-30,0.226908,0.092627,235.0,282.0,517.0
5,SNA,2024-08-19,Tools & Accessories,Industrials,14622320000.0,23.397,7759900000.0,950100000.0,1402400000.0,1337100000.0,2024-06-30,0.205939,0.095908,222.0,286.0,508.0
6,NUE,2024-08-19,Steel,Basic Materials,37123790000.0,22.896,34185220000.0,4865261000.0,5095961000.0,4638359000.0,2024-06-30,0.173805,0.137269,192.0,306.0,498.0
7,JBL,2024-08-19,Electronic Components,Technology,12841900000.0,21.887,17449000000.0,11423000000.0,1990000000.0,1520000000.0,2024-05-31,0.330236,0.154962,278.0,309.0,587.0
8,CPAY,2024-08-19,Software - Infrastructure,Technology,26338600000.0,20.272,16196540000.0,7842547000.0,1707174000.0,1699705000.0,2024-06-30,0.204354,0.064816,220.0,241.0,461.0
9,AZO,2024-08-19,Specialty Retail,Consumer Cyclical,67156310000.0,19.669,17108430000.0,9192587000.0,3726577000.0,3714523000.0,2024-05-31,0.470774,0.055491,298.0,215.0,513.0
