<a href="https://colab.research.google.com/github/JerryChenz/InvestmentManagement/blob/master/stock_screener.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Filters by Listing Location
market = 'HK' #@param ["HK","CN", "US", "JP", "Any"]

# Filters by Business Sector -- df.Sector.unique()
sector = "Real Estate" #@param ["Real Estate", "Consumer Cyclical", "Industrials", "Energy", "Utilities", "Healthcare", "Basic Materials", "Financial Services", "Consumer Defensive", "Technology", "Communication Services", "Any"]

## Step 1: Set inputs

We load the sample dataset from the my github repository, and display the dataset in pandas.

In [None]:
import pandas as pd
import numpy as np
summary_url = 'https://raw.githubusercontent.com/JerryChenz/InvestmentManagementOpen/main/financial_models/Opportunities/Screener/screener_summary.csv'
df = pd.read_csv(summary_url)
# Preparing the data
df['Last_fy'] = pd.to_datetime(df['Last_fy'])
# Missing Forex_rate
mop_hkd = (df["Price_currency"] == "HKD") & (df["Reporting_Currency"] == "MOP")
twd_hkd = (df["Price_currency"] == "HKD") & (df["Reporting_Currency"] == "TWD")
twd_usd = (df["Price_currency"] == "USD") & (df["Reporting_Currency"] == "TWD")
df.loc[mop_hkd, "Fx_rate"] = 0.98
df.loc[mop_hkd, "Fx_rate"] = 0.26
df.loc[twd_usd, "Fx_rate"] = 7.81
# exclude minor countries
df = df[df['Fx_rate'].notna()]
df = df.fillna(0)
df = df[
        ['Ticker', 'Name', 'Sector','Exchange', 'Price', 'Price_currency', 'Shares', 'Reporting_Currency', 'Fx_rate',
         'Dividend', 'Buyback', 'Last_fy', 'TotalAssets', 'CurrentAssets', 'CurrentLiabilities',
         'CurrentDebtAndCapitalLeaseObligation', 'CurrentCapitalLeaseObligation',
         'LongTermDebtAndCapitalLeaseObligation', 'LongTermCapitalLeaseObligation',
         'TotalEquityGrossMinorityInterest', 'MinorityInterest', 'CashAndCashEquivalents',
         'OtherShortTermInvestments', 'InvestmentProperties', 'LongTermEquityInvestment',
         'InvestmentinFinancialAssets', 'NetPPE', 'TotalRevenue', 'Avg_sales_growth', 'CostOfRevenue',
         'GrossMargin', 'Avg_Gross_margin', 'SellingGeneralAndAdministration',
         'EBIT', 'EbitMargin','CFO', 'CFI', 'CFF', 'Avg_ebit_margin', 'Avg_ebit_growth', 'InterestExpense',
         'NetIncomeCommonStockholders', 'NetMargin', 'Avg_net_margin', 'Avg_NetIncome_growth',
         'Years_of_data']]
display(df[['Ticker', 'Name', 'Sector','Exchange', 'Price', 'Price_currency', 'Dividend', 'Buyback']])

Unnamed: 0,Ticker,Name,Sector,Exchange,Price,Price_currency,Dividend,Buyback
0,000036.SZ,CHINA UNION HOLD L,Real Estate,SHZ,4.030000,CNY,0.000000,0.000000
1,000045.SZ,SHN TEXTILE HLDGS,Consumer Cyclical,SHZ,10.990000,CNY,0.000000,0.000000
2,000088.SZ,SHN YAN TIAN PORT,Industrials,SHZ,5.410000,CNY,0.000000,0.000000
3,000096.SZ,SHENZHEN GUANGJU E,Energy,SHZ,8.370000,CNY,0.000000,0.000000
4,000099.SZ,CITIC OFFSHORE HEL,Industrials,SHZ,7.440000,CNY,0.000000,0.000000
...,...,...,...,...,...,...,...,...
2895,ZD,"Ziff Davis, Inc.",Communication Services,NMS,89.620003,USD,0.000000,1.659766
2896,ZG,"Zillow Group, Inc.",Communication Services,NMS,43.250000,USD,0.000000,5.194365
2897,ZIM,ZIM Integrated Shipping Service,Industrials,NYQ,18.610001,USD,4.468150,0.000000
2898,ZTO,ZTO Express (Cayman) Inc.,Industrials,NYQ,29.110001,USD,1.672118,4.705978


In [None]:
"""0. Definitions"""
# capitalization in reporting currency
capitalization_price = df['Price'] * df['Shares']
capitalization_report = capitalization_price / df['Fx_rate']
# dividend rate & buyback rate
dividend_rate = df['Dividend'] / df['Price']
buyback_rate = df['Buyback'] / df['Price']
# Capital Structure related
total_debt = df['CurrentDebtAndCapitalLeaseObligation'] + df['LongTermDebtAndCapitalLeaseObligation']
common_equity = df['TotalEquityGrossMinorityInterest'] - df['MinorityInterest']
net_working_capital = np.where((df['CurrentAssets'] == 0) & (df['CurrentLiabilities'] == 0), common_equity, df['CurrentAssets'] - df['CurrentLiabilities'])
excess_cash = np.where(net_working_capital >= df['CashAndCashEquivalents'], df['CashAndCashEquivalents'], net_working_capital)
# Operating assets and liabilities
op_assets = df['TotalAssets'] - df['CashAndCashEquivalents']
op_liabilities = df['TotalAssets'] - df['TotalEquityGrossMinorityInterest'] - total_debt
net_op_assets = op_assets - op_liabilities
# Non-operating assets
current_financial_assets = df['OtherShortTermInvestments'] + df['InvestmentinFinancialAssets']

"""1. Stability Ratios"""
# liquidity_coverage_ratio
core_lcr = df['CashAndCashEquivalents'] / df['CurrentLiabilities']
lcr = (df['CashAndCashEquivalents'] + current_financial_assets) / df['CurrentLiabilities']
# leverage ratio
current_ratio = df['CurrentAssets'] / df['CurrentLiabilities']
debt_ratio = total_debt / common_equity
# Accrual anomaly - Scaled Total Accruals
# sta = (df['NetIncomeCommonStockholders'] - df['CFO']) / df['TotalAssets']
# Scaled Net Operating Assets
sona = net_op_assets / df['TotalAssets']
# Todo: Dr. Messod Beneish's PROBM model

"""2. Quality Ratios: """
ppe_multiple = df['NetPPE'] / df['TotalRevenue']
greenblatt_capital = df['NetPPE'] + net_working_capital - df['CashAndCashEquivalents']  # Not consider capital structure
greenblatt_roc = df['EBIT'] / greenblatt_capital

In [None]:
if market == 'HK':
  exchange_condition = df['Exchange'] == 'HKG'
elif market == 'CN':
  exchange_condition = (df['Exchange'] == 'SHZ') | (df['Exchange'] == 'SHH')
elif market == 'US':
  exchange_condition = (df['Exchange'] == 'NMS') | (df['Exchange'] == 'NYQ')
elif market == 'JP':
  exchange_condition = (df['Exchange'] == 'JPX')
else:
  exchange_condition = (df['Exchange'] != '')

if sector == 'Any':
  sector_condition = (df['Sector'] != '')
else:
  sector_condition = (df['Sector'] == sector)

df = df.loc[exchange_condition & sector_condition]
display(df[['Ticker', 'Name', 'Sector','Exchange', 'Price', 'Price_currency', 'Dividend', 'Buyback']])

Unnamed: 0,Ticker,Name,Sector,Exchange,Price,Price_currency,Dividend,Buyback
8,0004.HK,WHARF HOLDINGS,Real Estate,HKG,21.150000,HKD,0.000000,0.000000
72,0010.HK,HANG LUNG GROUP,Real Estate,HKG,14.740000,HKD,0.839941,0.000000
74,0012.HK,HENDERSON LAND,Real Estate,HKG,30.049999,HKD,1.800248,0.000000
95,0014.HK,HYSAN DEV,Real Estate,HKG,27.049999,HKD,1.457629,0.142160
96,0016.HK,SHK PPT,Real Estate,HKG,113.300003,HKD,4.949621,0.008972
...,...,...,...,...,...,...,...,...
2209,9928.HK,TIMES NEIGHBOR,Real Estate,HKG,1.200000,HKD,0.000000,0.000000
2220,9979.HK,GREENTOWN MGMT,Real Estate,HKG,6.800000,HKD,0.165438,0.000000
2221,9982.HK,CENTRALCHINA MT,Real Estate,HKG,0.860000,HKD,0.071836,0.002117
2222,9983.HK,CC NEW LIFE,Real Estate,HKG,3.620000,HKD,0.266540,0.006368


# Step 2: Pre-screening

We can screen using different sets of conditions, then merge them later.

In [None]:
# common fitlering conditions: 
# 1. Reasonable leverage and Good Liqudity
common_1 = (core_lcr >= 0.6) & (lcr >= 0.8)
common_2 = (current_ratio >= 1) & (debt_ratio <= 0.8)
common_3 = (total_debt / df['CurrentAssets']) < 1.5
# 2. Avoid negative gross margin and value trap
common_4 = capitalization_price > 1000000000
common_5 = (capitalization_price > 6000000000) & (df['Avg_Gross_margin'] > 0.01)
common_6 = (capitalization_price <= 6000000000) & ((dividend_rate > 0.01) | (buyback_rate > 0.01))

In [None]:
# filtered by common conditions
common_df = df
common_df['ROC'] = greenblatt_roc
common_df['lcr'] = lcr
common_df['core_lcr'] = core_lcr
common_df['Debt ratio'] = debt_ratio
common_df['Current ratio'] = current_ratio
common_df['Dividend rate'] = dividend_rate
common_df['Buyback rate'] = buyback_rate
common_df['PPE_multiple'] = ppe_multiple
common_df = common_df.loc[common_1 & common_2 & common_3 & common_4 & (common_5 | common_6)]
display(common_df.loc[:, ['Ticker', 'Name', 'Price', 'Price_currency', 'Dividend rate', 'Buyback rate', 'Avg_sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'ROC', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  common_df['ROC'] = greenblatt_roc
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  common_df['lcr'] = lcr
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  common_df['core_lcr'] = core_lcr
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

Unnamed: 0,Ticker,Name,Price,Price_currency,Dividend rate,Buyback rate,Avg_sales_growth,Avg_Gross_margin,Avg_ebit_margin,ROC,lcr,core_lcr,Current ratio,Debt ratio
184,0026.HK,CHINA MOTOR BUS,72.449997,HKD,0.04372,0.0,-6.19,5.4325,84.69,1.018063,66.629605,34.139193,34.227015,0.0
258,0071.HK,MIRAMAR HOTEL,12.4,HKD,0.0,0.0,-31.1,53.036667,44.506667,-4.270619,7.584353,7.464791,7.972771,0.003201
314,0194.HK,LIU CHONG HING,7.4,HKD,0.058108,0.0,-15.66,43.176667,32.0,0.095299,1.465675,1.18543,2.620671,0.164369
341,0258.HK,TOMSON GROUP,1.7,HKD,0.032353,0.0,-7.34,32.563333,35.06,0.250407,0.886942,0.813433,2.127416,0.021907
477,0606.HK,SCE CM,2.1,HKD,0.022477,0.0,46.46,57.183333,32.976667,-0.816947,4.680639,4.680639,4.920236,0.001095
501,0683.HK,KERRY PPT,20.5,HKD,0.178686,0.001358,-6.95,45.126667,43.85,0.149003,0.913698,0.805098,2.888401,0.449716
580,0873.HK,SHIMAO SERVICES,3.23,HKD,0.0,0.008331,83.96,68.763333,21.736667,-1.587572,0.865164,0.863452,1.53142,0.368584
667,1113.HK,CK ASSET,50.349998,HKD,0.0,0.10799,-11.79,46.74,53.26,0.21729,1.318944,1.318944,4.297657,0.130649
702,1209.HK,CHINA RES MIXC,46.349998,HKD,0.0,0.0,23.22,75.303333,16.583333,-0.440087,1.55034,1.55034,1.957418,0.132519
791,1516.HK,SUNAC SERVICES,4.37,HKD,0.0,0.020187,67.24,71.803333,20.013333,5.591689,1.074305,0.928148,1.955115,0.014884


#Step 3. Screening

Filter the dataset using the above conditions

In [None]:
# 1st set of conditions: Stability
asset_1 = (core_lcr >= 0.8) & (lcr >= 0.9)
asset_2 = (dividend_rate > 0.01) | (buyback_rate > 0.01)
asset_3 = (current_ratio >= 1.5) & (debt_ratio <= 0.5)

# filtered by 1st set of conditions: Stability
asset_df = common_df
asset_df['is_Asset'] = True
asset_df = asset_df.loc[asset_1 & asset_2 & asset_3]
display(asset_df.loc[:, ['Ticker', 'Name', 'Price', 'Price_currency', 'Dividend rate', 'Buyback rate', 'ROC', 'Avg_sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  asset_df['is_Asset'] = True


Unnamed: 0,Ticker,Name,Price,Price_currency,Dividend rate,Buyback rate,ROC,Avg_sales_growth,Avg_Gross_margin,Avg_ebit_margin,lcr,core_lcr,Current ratio,Debt ratio
184,0026.HK,CHINA MOTOR BUS,72.449997,HKD,0.04372,0.0,1.018063,-6.19,5.4325,84.69,66.629605,34.139193,34.227015,0.0
314,0194.HK,LIU CHONG HING,7.4,HKD,0.058108,0.0,0.095299,-15.66,43.176667,32.0,1.465675,1.18543,2.620671,0.164369
477,0606.HK,SCE CM,2.1,HKD,0.022477,0.0,-0.816947,46.46,57.183333,32.976667,4.680639,4.680639,4.920236,0.001095
501,0683.HK,KERRY PPT,20.5,HKD,0.178686,0.001358,0.149003,-6.95,45.126667,43.85,0.913698,0.805098,2.888401,0.449716
667,1113.HK,CK ASSET,50.349998,HKD,0.0,0.10799,0.21729,-11.79,46.74,53.26,1.318944,1.318944,4.297657,0.130649
791,1516.HK,SUNAC SERVICES,4.37,HKD,0.0,0.020187,5.591689,67.24,71.803333,20.013333,1.074305,0.928148,1.955115,0.014884
1444,3316.HK,BINJIANG SER,24.700001,HKD,0.032023,0.0,-1.099656,41.25,69.61,24.823333,1.285909,1.014911,1.685223,0.00118
1916,6626.HK,YUEXIU SERVICES,4.58,HKD,0.01294,0.0,-0.638072,47.28,67.78,28.166667,2.264989,2.247855,2.760039,0.019334
2178,9666.HK,JINKE SERVICES,14.58,HKD,0.034324,0.0,0.454038,61.0,70.686667,23.74,1.593463,1.573082,3.537193,0.032496
2220,9979.HK,GREENTOWN MGMT,6.8,HKD,0.024329,0.0,-3.336417,7.33,53.88,29.976667,0.935933,0.851814,1.678311,0.004729


In [None]:
# 2nd set of conditions: Stalwart
stalwart_1 = (df['Avg_Gross_margin'] > 10) & (df['Avg_ebit_margin'] > 15)
stalwart_2 = df['Avg_sales_growth'] >= -0.10
stalwart_3 = greenblatt_roc > 0.03

# fitlered by 2nd set of conditions: Stalwart
Stalwart_df = common_df
Stalwart_df['is_Stalwart'] = True
Stalwart_df = Stalwart_df.loc[stalwart_1 & stalwart_2 & stalwart_3]
display(Stalwart_df.loc[:, ['Ticker', 'Name', 'Price', 'Price_currency', 'Dividend rate', 'Buyback rate', 'Avg_sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'ROC', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Stalwart_df['is_Stalwart'] = True


Unnamed: 0,Ticker,Name,Price,Price_currency,Dividend rate,Buyback rate,Avg_sales_growth,Avg_Gross_margin,Avg_ebit_margin,ROC,lcr,core_lcr,Current ratio,Debt ratio
791,1516.HK,SUNAC SERVICES,4.37,HKD,0.0,0.020187,67.24,71.803333,20.013333,5.591689,1.074305,0.928148,1.955115,0.014884
2178,9666.HK,JINKE SERVICES,14.58,HKD,0.034324,0.0,61.0,70.686667,23.74,0.454038,1.593463,1.573082,3.537193,0.032496
2222,9983.HK,CC NEW LIFE,3.62,HKD,0.07363,0.001759,43.44,67.283333,23.756667,1.101823,1.086418,1.059568,2.453578,0.03902


In [None]:
# combine the results
result_set = pd.concat([Stalwart_df, asset_df])

"""Price Ratios: """
nonop_noncash_assets = 0.75 * (df['InvestmentProperties'] + df['LongTermEquityInvestment']) + current_financial_assets
enterprise_value = capitalization_report + total_debt + df['MinorityInterest'] - excess_cash - nonop_noncash_assets
ebit_tev = df['EBIT'] / enterprise_value
result_set['EV'] = enterprise_value # may need to update the price
result_set['EBIT/EV'] = ebit_tev

# Sort the set
result_set = result_set.groupby("Ticker").first().sort_values('EV')

# find the subset of Asset_play and Stalwart
is_asset_stalwart = [result_set['is_Asset'].eq(True) & result_set['is_Stalwart'].eq(True)]
result_set['A_S'] = np.select(is_asset_stalwart, [True], default='')

display(result_set[['Name', 'Price', 'Dividend rate', 'Buyback rate', 'EV', 'EBIT/EV', 'ROC', 'Avg_sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio', 'A_S']])

Unnamed: 0_level_0,Name,Price,Dividend rate,Buyback rate,EV,EBIT/EV,ROC,Avg_sales_growth,Avg_Gross_margin,Avg_ebit_margin,lcr,core_lcr,Current ratio,Debt ratio,A_S
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0683.HK,KERRY PPT,20.5,0.178686,0.001358,-28262710000.0,-0.238465,0.149003,-6.95,45.126667,43.85,0.913698,0.805098,2.888401,0.449716,
0194.HK,LIU CHONG HING,7.4,0.058108,0.0,-4814320000.0,-0.049311,0.095299,-15.66,43.176667,32.0,1.465675,1.18543,2.620671,0.164369,
0026.HK,CHINA MOTOR BUS,72.449997,0.04372,0.0,-4355718000.0,-0.013263,1.018063,-6.19,5.4325,84.69,66.629605,34.139193,34.227015,0.0,
9982.HK,CENTRALCHINA MT,0.86,0.08353,0.002462,399128700.0,3.21134,7.075249,12.46,0.0,97.28,4.388903,4.379948,5.737879,0.002025,
0606.HK,SCE CM,2.1,0.022477,0.0,1214817000.0,0.296351,-0.816947,46.46,57.183333,32.976667,4.680639,4.680639,4.920236,0.001095,
9983.HK,CC NEW LIFE,3.62,0.07363,0.001759,2103907000.0,0.426095,1.101823,43.44,67.283333,23.756667,1.086418,1.059568,2.453578,0.03902,True
6626.HK,YUEXIU SERVICES,4.58,0.01294,0.0,2818946000.0,0.180698,-0.638072,47.28,67.78,28.166667,2.264989,2.247855,2.760039,0.019334,
9666.HK,JINKE SERVICES,14.58,0.034324,0.0,4470261000.0,0.304697,0.454038,61.0,70.686667,23.74,1.593463,1.573082,3.537193,0.032496,True
3316.HK,BINJIANG SER,24.700001,0.032023,0.0,4814516000.0,0.081485,-1.099656,41.25,69.61,24.823333,1.285909,1.014911,1.685223,0.00118,
1516.HK,SUNAC SERVICES,4.37,0.0,0.020187,7052862000.0,0.237216,5.591689,67.24,71.803333,20.013333,1.074305,0.928148,1.955115,0.014884,True


In [None]:
display_set = result_set.loc[result_set['A_S'] != '']
display(display_set[['Name', 'Price', 'Dividend rate', 'Buyback rate', 'EV', 'EBIT/EV', 'ROC', 'Avg_sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio']])

Unnamed: 0_level_0,Name,Price,Dividend rate,Buyback rate,EV,EBIT/EV,ROC,Avg_sales_growth,Avg_Gross_margin,Avg_ebit_margin,lcr,core_lcr,Current ratio,Debt ratio
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
9983.HK,CC NEW LIFE,3.62,0.07363,0.001759,2103907000.0,0.426095,1.101823,43.44,67.283333,23.756667,1.086418,1.059568,2.453578,0.03902
9666.HK,JINKE SERVICES,14.58,0.034324,0.0,4470261000.0,0.304697,0.454038,61.0,70.686667,23.74,1.593463,1.573082,3.537193,0.032496
1516.HK,SUNAC SERVICES,4.37,0.0,0.020187,7052862000.0,0.237216,5.591689,67.24,71.803333,20.013333,1.074305,0.928148,1.955115,0.014884


In [None]:
# Export the screen result to csv file
export = 'No' #@param ["Yes","No"]
simplified = 'No' #@param ["Yes","No"]

if export == "Yes":
  from google.colab import files
  csv_name = "Screener_result.csv"
  if simplified == "Yes":
    result_set[['Name', 'Price', 'Dividend rate', 'Buyback rate', 'EV', 'EBIT/EV', 'ROC', 'Avg_sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio', 'A_S']].to_csv(csv_name)
  else:
    csv_name = f"Raw_{csv_name}"
    result_set.to_csv(csv_name)
  files.download(csv_name)
print("***End of the Screener***")

***End of the Screener***
