########## choose stock list (aka market) ##################

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date

In [2]:
## To acquire stock list in NASDAQ ###
ex1 = pd.read_excel('stock_info_hk.xlsx', sheet_name='ListOfSecurities',skiprows=2,dtype=str)
ex1 = ex1.loc[ex1['Category']=='Equity']
ex1 = ex1.loc[ex1['Sub-Category']=='Equity Securities (Main Board)']
ex1 = ex1.loc[ex1['Trading Currency']=='HKD']
ex1['Stock Code'] = ex1['Stock Code'].str.slice(1)

df = pd.DataFrame(ex1['Stock Code'].astype('str') + '.HK')
df = df.rename(columns={'Stock Code' : 'ticker'})
df = df.reset_index(drop=True)
df = df.drop_duplicates()
df = df.reset_index(drop=True)
df

Unnamed: 0,ticker
0,0001.HK
1,0002.HK
2,0003.HK
3,0004.HK
4,0005.HK
...,...
2278,9995.HK
2279,9996.HK
2280,9997.HK
2281,9998.HK


### Acquire informations from yfinance function >> .info

In [3]:
# 1st pulling date for this data #
df['date_pulling'] = date.today()

In [4]:
# 2nd acquiring information from .info #
info_attribute_list = [
    'industry',
    'sector',
    'enterpriseValue',
    'totalCashPerShare',
    'profitMargins',
    'trailingPE'
]
# 3rd acquiring information from .quarterly_balance_sheet #
balancesheet_list = [
    'Total Assets',
    'Current Liabilities'
]

# 4th acquiring information from .quarterly_financials #
ttm = 4
financials_list = [
    'EBIT',
    'Operating Income'
]

info_attribute_list_buffer = []
balancesheet_list_buffer = []
financials_list_buffer = []
for i in range(len(df)):
    # print(i, " ",df['ticker'][i])
    yfticker = yf.Ticker(df['ticker'][i])

    # 2nd acquiring information from .info #
    info_attribute_list_buffer_r = []
    for j in range(len(info_attribute_list)):
        try:
            info_attribute_list_buffer_r.append(yfticker.info[info_attribute_list[j]])
        except:
            info_attribute_list_buffer_r.append(None)
    info_attribute_list_buffer.append(info_attribute_list_buffer_r)

    # 3rd acquiring information from .balancesheet #
    balancesheet_list_buffer_r = []
    for j in range(len(balancesheet_list)):
        try:
            balancesheet_list_buffer_r.append(yfticker.balance_sheet.loc[yfticker.balance_sheet.index == balancesheet_list[j]].values[0][0])
        except:
            balancesheet_list_buffer_r.append(None)
    balancesheet_list_buffer.append(balancesheet_list_buffer_r)

    # 4th acquiring information from .financials #
    financials_list_buffer_r = []
    for j in range(len(financials_list)):
        try:
            financials_list_buffer_r.append(yfticker.financials.loc[yfticker.financials.index == financials_list[j]].values[0][0])
        except:
            financials_list_buffer_r.append(None)
    try:
        financials_list_buffer_r.append(yfticker.financials.columns[0])
    except:
        financials_list_buffer_r.append(None)
    financials_list_buffer.append(financials_list_buffer_r)

financials_list.append('ttm_latest')
df = df.join(pd.DataFrame(info_attribute_list_buffer, columns=info_attribute_list))
df = df.join(pd.DataFrame(balancesheet_list_buffer, columns=balancesheet_list))
df = df.join(pd.DataFrame(financials_list_buffer, columns=financials_list))


## Drop NAN #############
df = df.dropna()
#### Drop where marketcap and EBIT < 0#############
df = df.loc[df['enterpriseValue'] >0]
df = df.loc[df['EBIT'] >0]
df = df.loc[df['Operating Income'] >0]
df = df.reset_index(drop=True)

df.to_csv('data_stock_hk.csv',index= False)

############################################################################

In [5]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date
df = pd.read_csv('data_stock_hk.csv')
print('sector', df['sector'].unique())
print('industry', df['industry'].unique())

sector ['Industrials' 'Utilities' 'Real Estate' 'Communication Services'
 'Consumer Cyclical' 'Technology' 'Financial Services' 'Basic Materials'
 'Energy' 'Consumer Defensive' 'Healthcare']
industry ['Conglomerates' 'Utilities - Regulated Electric'
 'Utilities - Regulated Gas' 'Utilities - Independent Power Producers'
 'Real Estate Services' 'Real Estate - Diversified'
 'Real Estate - Development' 'Publishing' 'Resorts & Casinos'
 'Electronic Components' 'Farm & Heavy Construction Machinery'
 'Information Technology Services' 'Restaurants'
 'Specialty Industrial Machinery' 'Railroads' 'Lodging' 'Semiconductors'
 'Utilities - Renewable' 'Credit Services' 'Auto Parts' 'Aluminum'
 'Consumer Electronics' 'Steel' 'Specialty Retail'
 'Infrastructure Operations' 'Luxury Goods' 'Real Estate—Development'
 'Oil & Gas Refining & Marketing' 'Entertainment' 'Packaged Foods'
 'Marine Shipping' 'Textile Manufacturing' 'Real Estate—Diversified'
 'Internet Content & Information' 'Waste Management' 'Be

In [6]:

### Next I perform calculations for MF_ROC and MF_EY ###
represents_earning = 'Operating Income'
# represents_earning = 'EBIT'

df['MF_ROC'] = df[represents_earning]/(df['Total Assets'] - df['Current Liabilities'])
df['MF_EY'] = df[represents_earning]/df['enterpriseValue']
df

Unnamed: 0,ticker,date_pulling,industry,sector,enterpriseValue,totalCashPerShare,profitMargins,trailingPE,Total Assets,Current Liabilities,EBIT,Operating Income,ttm_latest,MF_ROC,MF_EY
0,0001.HK,2024-08-20,Conglomerates,Industrials,4.890793e+11,34.370,0.08074,6.984668,1.158903e+12,1.625250e+11,4.506100e+10,4.641900e+10,2023-12-31,0.046588,0.094911
1,0002.HK,2024-08-20,Utilities - Regulated Electric,Utilities,2.382158e+11,2.160,0.08738,23.043478,2.290510e+11,4.247900e+10,1.247200e+10,1.518400e+10,2023-12-31,0.081384,0.063741
2,0003.HK,2024-08-20,Utilities - Regulated Gas,Utilities,1.816751e+11,0.535,0.10035,21.793104,1.619776e+11,3.942450e+10,1.138900e+10,8.137300e+09,2023-12-31,0.066398,0.044790
3,0006.HK,2024-08-20,Utilities - Independent Power Producers,Utilities,1.153961e+11,0.696,5.86809,19.154930,9.570200e+10,3.249000e+09,6.395000e+09,1.131000e+09,2023-12-31,0.012233,0.009801
4,0010.HK,2024-08-20,Real Estate Services,Real Estate,1.163211e+11,5.301,0.16915,5.775510,2.314520e+11,1.507900e+10,7.197000e+09,7.119000e+09,2023-12-31,0.032902,0.061201
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
963,9988.HK,2024-08-20,Internet Retail,Consumer Cyclical,1.456038e+12,23.598,0.07383,21.223684,1.764829e+12,4.215070e+11,1.195070e+11,1.238710e+11,2024-03-31,0.092212,0.085074
964,9992.HK,2024-08-20,Leisure,Consumer Cyclical,4.999579e+10,4.493,0.17177,47.840908,9.968863e+09,1.733301e+09,1.448092e+09,1.226965e+09,2023-12-31,0.148984,0.024541
965,9997.HK,2024-08-20,Medical Instruments & Supplies,Healthcare,4.546523e+09,2.210,0.54429,11.936171,4.263818e+09,1.908240e+08,5.910280e+08,5.009780e+08,2023-12-31,0.123000,0.110189
966,9998.HK,2024-08-20,Engineering & Construction,Industrials,5.968200e+07,0.049,0.01969,6.000000,8.529300e+07,4.563800e+07,2.525000e+06,1.326000e+06,2023-06-30,0.033438,0.022218


In [7]:
### Drop Utilities, Energy, and Financial Services  as suggested from the book ###
sectortoexclude = [
    'Utilities',
    'Energy',
    'Financial Services',
    'Real Estate'
]
for i in sectortoexclude:
    print('exclude sector > ',i)
    try:
         
        df = df.loc[df['sector'] != i]
    except:
        None
        
industrytoexclude =[
    'Engineering & Construction',
    'Building Products & Equipment',
    'Building Materials'
]
for i in industrytoexclude:
    print('exclude industry > ',i)
    try:
         
        df = df.loc[df['industry'] != i]
    except:
        None

df = df.reset_index(drop=True)

exclude sector >  Utilities
exclude sector >  Energy
exclude sector >  Financial Services
exclude sector >  Real Estate
exclude industry >  Engineering & Construction
exclude industry >  Building Products & Equipment
exclude industry >  Building Materials


In [8]:
#### Select market size by choosing ' market ' ###
market =  50000000 * 7.79 ### in HKD
df_market = df.loc[df['enterpriseValue'] >= market]
df_market = df_market.reset_index(drop=True)

In [9]:
### Ranking regarding MFs####

df_market['Ranking_MF_ROC'] = df_market['MF_ROC'].rank()
df_market['Ranking_MF_EY'] = df_market['MF_EY'].rank()
df_market['Ranking_MF'] = df_market['Ranking_MF_ROC'] + df_market['Ranking_MF_EY']

df_market.loc[df_market['Ranking_MF'] == df_market['Ranking_MF'].max()]

Unnamed: 0,ticker,date_pulling,industry,sector,enterpriseValue,totalCashPerShare,profitMargins,trailingPE,Total Assets,Current Liabilities,EBIT,Operating Income,ttm_latest,MF_ROC,MF_EY,Ranking_MF_ROC,Ranking_MF_EY,Ranking_MF
551,9696.HK,2024-08-20,Other Industrial Metals & Mining,Basic Materials,40008220000.0,5.731,-0.04661,4.233955,74969070000.0,6659867000.0,36812430000.0,33895770000.0,2023-12-31,0.496211,0.84722,577.0,574.0,1151.0


In [10]:
numstocks = 50
df_sorted = df_market.sort_values(by=['Ranking_MF'],ascending=False)
df_sorted = df_sorted.reset_index(drop=True)
df_sorted[:numstocks]

Unnamed: 0,ticker,date_pulling,industry,sector,enterpriseValue,totalCashPerShare,profitMargins,trailingPE,Total Assets,Current Liabilities,EBIT,Operating Income,ttm_latest,MF_ROC,MF_EY,Ranking_MF_ROC,Ranking_MF_EY,Ranking_MF
0,9696.HK,2024-08-20,Other Industrial Metals & Mining,Basic Materials,40008220000.0,5.731,-0.04661,4.233955,74969070000.0,6659867000.0,36812430000.0,33895770000.0,2023-12-31,0.496211,0.84722,577.0,574.0,1151.0
1,2660.HK,2024-08-20,Electronic Gaming & Multimedia,Communication Services,912497200.0,1.929,0.35248,3.636364,2640490000.0,290124000.0,883677000.0,837314000.0,2023-12-31,0.356248,0.917607,566.0,575.0,1141.0
2,1958.HK,2024-08-20,Auto Manufacturers,Consumer Cyclical,15882650000.0,4.395,0.01326,4.756098,168723200000.0,76453460000.0,21495040000.0,23473600000.0,2023-12-31,0.254402,1.47794,544.0,580.0,1124.0
3,0085.HK,2024-08-20,Semiconductors,Technology,977658200.0,0.883,0.22736,3.147059,4326786000.0,1754651000.0,779259000.0,716956000.0,2023-12-31,0.27874,0.73334,552.0,571.0,1123.0
4,1184.HK,2024-08-20,Electronics & Computer Distribution,Technology,2051690000.0,2.668,0.01805,6.076923,7825902000.0,4872046000.0,802606000.0,914495000.0,2023-12-31,0.309594,0.445728,558.0,558.0,1116.0
5,9919.HK,2024-08-20,Advertising Agencies,Communication Services,501687100.0,0.412,0.11165,7.5,795401000.0,428772000.0,165195000.0,162009000.0,2023-12-31,0.441888,0.322928,576.0,536.0,1112.0
6,1126.HK,2024-08-20,Leisure,Consumer Cyclical,1712307000.0,2.056,0.15504,3.560976,4384667000.0,1126744000.0,844188000.0,809265000.0,2022-12-31,0.248399,0.472617,538.0,560.0,1098.0
7,1558.HK,2024-08-20,Drug Manufacturers—Specialty & Generic,Healthcare,9478315000.0,1.924,0.31656,3.898374,12744330000.0,4332220000.0,2354169000.0,2877598000.0,2023-12-31,0.342078,0.303598,564.0,528.0,1092.0
8,1003.HK,2024-08-20,Entertainment,Communication Services,1681311000.0,0.085,0.1194,13.25,2234243000.0,777126000.0,217008000.0,467160000.0,2023-12-31,0.320606,0.277855,561.0,520.0,1081.0
9,6610.HK,2024-08-20,Software—Application,Technology,578519600.0,0.186,0.21205,2.59375,1721274000.0,268142000.0,276499000.0,303227000.0,2023-12-31,0.208671,0.524143,515.0,564.0,1079.0


In [11]:
df_sorted_2 = df_sorted[:numstocks].copy()
df_sorted_2 = df_sorted_2.sort_values(by=['totalCashPerShare'],ascending=False)
df_sorted_2 = df_sorted_2.reset_index(drop=True)
df_sorted_2

Unnamed: 0,ticker,date_pulling,industry,sector,enterpriseValue,totalCashPerShare,profitMargins,trailingPE,Total Assets,Current Liabilities,EBIT,Operating Income,ttm_latest,MF_ROC,MF_EY,Ranking_MF_ROC,Ranking_MF_EY,Ranking_MF
0,0921.HK,2024-08-20,"Furnishings, Fixtures & Appliances",Consumer Cyclical,15602530000.0,15.881,0.03573,7.710938,65946500000.0,44042090000.0,5828946000.0,4537223000.0,2023-12-31,0.207137,0.2908,514.0,525.0,1039.0
1,2877.HK,2024-08-20,Drug Manufacturers—Specialty & Generic,Healthcare,665903000.0,7.827,0.21466,5.906475,9573993000.0,2433718000.0,1242990000.0,975535000.0,2023-12-31,0.136624,1.464981,412.0,579.0,991.0
2,0098.HK,2024-08-20,Aluminum,Basic Materials,3601718000.0,6.752,0.04634,3.745192,12866230000.0,4794935000.0,1054379000.0,1291607000.0,2023-12-31,0.160025,0.358609,462.0,543.0,1005.0
3,9696.HK,2024-08-20,Other Industrial Metals & Mining,Basic Materials,40008220000.0,5.731,-0.04661,4.233955,74969070000.0,6659867000.0,36812430000.0,33895770000.0,2023-12-31,0.496211,0.84722,577.0,574.0,1151.0
4,1681.HK,2024-08-20,Drug Manufacturers—Specialty & Generic,Healthcare,1658169000.0,4.522,0.3029,5.168224,5507994000.0,1494996000.0,884967000.0,822630000.0,2023-12-31,0.204991,0.496108,512.0,561.0,1073.0
5,1958.HK,2024-08-20,Auto Manufacturers,Consumer Cyclical,15882650000.0,4.395,0.01326,4.756098,168723200000.0,76453460000.0,21495040000.0,23473600000.0,2023-12-31,0.254402,1.47794,544.0,580.0,1124.0
6,1651.HK,2024-08-20,Tools & Accessories,Industrials,2555287000.0,2.943,0.15385,7.080292,3632235000.0,806208000.0,703727000.0,681776000.0,2024-03-31,0.241249,0.26681,534.0,516.0,1050.0
7,0546.HK,2024-08-20,Specialty Chemicals,Basic Materials,13936010000.0,2.756,0.11226,3.251852,32560890000.0,14044560000.0,4015298000.0,3491442000.0,2023-12-31,0.18856,0.250534,500.0,513.0,1013.0
8,0327.HK,2024-08-20,Business Equipment & Supplies,Industrials,1731468000.0,2.729,0.17217,4.019048,9075329000.0,1488815000.0,1259340000.0,1205013000.0,2023-12-31,0.158836,0.695949,458.0,570.0,1028.0
9,1184.HK,2024-08-20,Electronics & Computer Distribution,Technology,2051690000.0,2.668,0.01805,6.076923,7825902000.0,4872046000.0,802606000.0,914495000.0,2023-12-31,0.309594,0.445728,558.0,558.0,1116.0
