<a href="https://colab.research.google.com/github/JerryChenz/Invest_Proc/blob/master/stock_screener.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Step 1: Set inputs

We load the sample dataset from the my github repository, and display the dataset in pandas.

In [1]:
import pandas as pd
import numpy as np
summary_url = 'https://raw.githubusercontent.com/JerryChenz/Invest_Proc_Open/main/financial_models/Opportunities/Screener/screener_summary_1.csv'
df = pd.read_csv(summary_url)
# prepare the data
df.rename(columns={'Unnamed: 0': 'ticker'}, inplace=True)
# exclude minor countries with no forex quote on yahoo
df = df[df['fxRate'] != 0]
# rename the first column
# display(df)

# Step 2: Pre-screening

We can screen using different sets of conditions, then merge them later.

In [2]:
# Filters by Listing Location
market = 'CN' #@param ["HK","CN", "US", "Any"]

# Filters by Business Sector -- df.Sector.unique()
sector = "Any" #@param ["Real Estate", "Consumer Cyclical", "Industrials", "Energy", "Utilities", "Healthcare", "Basic Materials", "Financial Services", "Consumer Defensive", "Technology", "Communication Services", "Any"]

In [3]:
"""
Available variables:
'shortName', 'sector', 'industry', 'market', 'price', 'priceCurrency', 
'sharesOutstanding', 'reportCurrency', 'fxRate', 
'lastFiscalYearEnd', 'mostRecentQuarter', 'lastDividend', 'lastBuyback',
'totalAssets', 'currentAssets', 'currentLiabilities',
'totalAssets_-1', 'currentAssets_-1', 'currentLiabilities_-1',
'cashAndCashEquivalents', 'otherShortTermInvestments',
'cashAndCashEquivalents_-1', 'otherShortTermInvestments_-1',
'currentDebtAndCapitalLease', 'currentCapitalLease',
'currentDebtAndCapitalLease_-1', 'currentCapitalLease_-1',
'longTermDebtAndCapitalLease', 'longTermCapitalLease',
'longTermDebtAndCapitalLease_-1', 'longTermCapitalLease_-1',
'totalEquityAndMinorityInterest', 'commonStockEquity',
'totalEquityAndMinorityInterest_-1', 'commonStockEquity_-1',
'investmentProperties', 'longTermEquityInvestment', 'longTermFinancialAssets',
'investmentProperties_-1', 'longTermEquityInvestment_-1', 'longTermFinancialAssets_-1',
'netPPE', 'totalRevenue', 'costOfRevenue', 'sellingGeneralAndAdministration',
'netPPE_-1', 'totalRevenue_-1', 'costOfRevenue_-1', 'sellingGeneralAndAdministration_-1',
'netIncomeCommonStockholders', 'interestPaidCfo', 'interestPaidCff',
'netIncomeCommonStockholders_-1', 'interestPaidCfo_-1', 'interestPaidCff_-1',
'cfo', 'cfi', 'cff', 'endCashPosition',
'cfo_-1', 'cfi_-1', 'cff_-1', 'endCashPosition_-1'
"""

"""0. Definitions"""
# capitalization in reporting currency
capitalization_price = df['price'] * df['sharesOutstanding']
capitalization_report = capitalization_price / df['fxRate']
# dividend rate & buyback rate
dividend_rate = df['lastDividend'] / df['price']
buyback_rate = df['lastBuyback'] / df['price']
# Capital Structure related
total_debt = df['currentDebtAndCapitalLease'] + df['longTermDebtAndCapitalLease']
noncommonInterest = df['totalEquityAndMinorityInterest'] - df['commonStockEquity']
# Operating assets and liabilities
op_assets = df['totalAssets'] - df['cashAndCashEquivalents']
op_liabilities = df['totalAssets'] - df['totalEquityAndMinorityInterest'] - total_debt
net_op_assets = op_assets - op_liabilities

"""1. Stability Ratios"""
# leverage ratio
current_ratio = df['currentAssets'] / df['currentLiabilities']
debt_ratio = total_debt / df['commonStockEquity']
# Accrual anomaly - Scaled Total Accruals
# sta = (df['NetIncomeCommonStockholders'] - df['CFO']) / df['TotalAssets']
# Scaled Net Operating Assets
sona = net_op_assets / df['totalAssets']
# Todo: Dr. Messod Beneish's PROBM model

"""2. Quality Ratios: """
ppe_multiple = df['netPPE'] / df['totalRevenue']
# Net working Capital
net_working_capital = np.where((df['currentAssets'] == 0) & (df['currentLiabilities'] == 0), df['commonStockEquity'], df['currentAssets'] - df['currentLiabilities'])
# Return measurements
grossProfit = df['totalRevenue'] - df['costOfRevenue']
grossProfit_1 = df['totalRevenue_-1'] - df['costOfRevenue_-1']
ebit = grossProfit - df['sellingGeneralAndAdministration']
ebit_1 = grossProfit_1 - df['sellingGeneralAndAdministration_-1']
# ROC
greenblatt_capital = df['netPPE'] + net_working_capital - df['cashAndCashEquivalents']  # Not consider capital structure
greenblatt_roc = ebit / greenblatt_capital

"""Liquidity test"""
# liquidity_coverage_ratio
discounted_st_investment = df['otherShortTermInvestments']* 0.9
core_liquid_assets = df['cashAndCashEquivalents'] + discounted_st_investment
core_lcr = core_liquid_assets / total_debt
lcr = (core_liquid_assets + df['longTermFinancialAssets']) / (df['currentLiabilities'] + df['longTermDebtAndCapitalLease'])

"""Price Ratios: """
# defining excess_cash
excess_cash = np.where(net_working_capital >= df['cashAndCashEquivalents'], df['cashAndCashEquivalents'], net_working_capital)
# Non-operating_noncash assets
nonop_noncash_assets = 0.9 * (df['investmentProperties'] + df['longTermEquityInvestment']) + discounted_st_investment 
# enterprise value
enterprise_value = capitalization_report + total_debt + noncommonInterest - excess_cash - nonop_noncash_assets
ebit_tev = ebit / enterprise_value

df['sales_growth'] = df['totalRevenue']/df['totalRevenue_-1']-1
df['Avg_Gross_margin'] = (grossProfit/df['totalRevenue'] + grossProfit_1/df['totalRevenue_-1'])/2
df['Avg_ebit_margin'] = (ebit/df['totalRevenue'] + ebit_1/df['totalRevenue_-1'])/2

df['EV'] = enterprise_value # may need to update the price
df['EBIT/EV'] = ebit_tev
df['ROC'] = greenblatt_roc
df['Debt ratio'] = debt_ratio
df['Current ratio'] = current_ratio
df['Dividend rate'] = dividend_rate
df['Buyback rate'] = buyback_rate
df['PPE_multiple'] = ppe_multiple
df['lcr'] = lcr
df['core_lcr'] = core_lcr

# display(df)

In [4]:
# filter by market
if market == 'HK':
  exchange_condition = df['market'] == 'hk_market'
elif market == 'CN':
  exchange_condition = df['market'] == 'cn_market'
elif market == 'US':
  exchange_condition = df['market'] == 'us_market'
else:
  exchange_condition = (df['market'] not in ['hk_market', 'cn_market', 'us_market'])

df = df.loc[exchange_condition]

display(df)

Unnamed: 0,ticker,shortName,sector,industry,market,price,priceCurrency,sharesOutstanding,reportCurrency,fxRate,...,EV,EBIT/EV,ROC,Debt ratio,Current ratio,Dividend rate,Buyback rate,PPE_multiple,lcr,core_lcr
0,000001.SZ,PING AN BANK,Financial Services,Banks—Regional,cn_market,14.04,CNY,1.940590e+10,CNY,1.000000,...,6.356038e+11,0.191053,-1.519121,1.846487,,0.041243,0.0,0.104050,0.671888,0.671888
1,000002.SZ,CHINA VANKE CO,Real Estate,Real Estate—Development,cn_market,17.44,CNY,9.724200e+09,CNY,1.000000,...,3.259036e+11,0.232491,0.319760,1.337640,1.268682,0.158336,0.0,0.092490,0.084974,0.368498
2,000006.SZ,SHN ZHENYE GROUP,Real Estate,Real Estate—Development,cn_market,5.94,CNY,1.350000e+09,CNY,1.000000,...,1.246037e+10,0.099378,0.154353,1.207039,2.201449,0.065755,0.0,0.027814,0.243405,0.473683
3,000008.SZ,CHINA HIGH-SPEED R,Industrials,Infrastructure Operations,cn_market,2.49,CNY,2.716380e+09,CNY,1.000000,...,9.275467e+09,0.045674,0.448313,0.818039,1.102672,0.028972,0.0,0.598389,0.179871,0.219227
4,000009.SZ,CHINA BAOAN GROUP,Industrials,Conglomerates,cn_market,12.70,CNY,2.579210e+09,CNY,1.000000,...,4.591379e+10,0.080059,0.327542,1.895944,1.448853,0.020589,0.0,0.553188,0.364856,0.631182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5208,688789.SS,HANGZHOU HONGHUA DIGITAL TECHNO,Industrials,Specialty Industrial Machinery,cn_market,183.46,CNY,7.611560e+07,CNY,1.000000,...,1.330876e+10,0.025256,0.420373,0.011479,5.528243,0.000166,0.0,0.351590,2.717176,37.181340
5209,688798.SS,SHANGHAI AWINIC TECHNOLOGY CO L,Technology,Semiconductors,cn_market,113.00,CNY,1.660000e+08,CNY,1.000000,...,1.652902e+10,0.047479,0.393243,0.158697,3.382583,0.000380,0.0,0.295866,2.223062,4.684993
5211,688800.SS,SUZHOU RECODEAL INTERCONNECT SY,Industrials,Electrical Equipment & Parts,cn_market,102.17,CNY,1.131570e+08,CNY,1.000000,...,1.052718e+10,0.018122,0.255695,0.044652,2.592263,0.000178,0.0,0.348550,1.203131,13.271815
5212,688819.SS,TIANNENG BATTERY GROUP CO LTD,Consumer Cyclical,Auto Parts,cn_market,36.83,CNY,9.721000e+08,CNY,1.000000,...,3.363388e+10,0.138937,2.782742,0.414274,1.326139,0.020900,0.0,0.166236,0.660497,2.350877


In [5]:
# filter by sector
if sector == 'Any':
  sector_condition = df['sector'] != ''
else:
  if sector == "Real Estate":
    missing_stocks = df['ticker'].str.contains("0019.HK")
    not_service = ~df["shortName"].str.contains("SERVICES") & ~df["shortName"].str.contains("MOTOR") & ~df["shortName"].str.contains("SERVICE") & ~df["shortName"].str.contains("SER")
    sector_condition = ((df['sector'] == sector) | missing_stocks) & (not_service)
  else:
    sector_condition = df['sector'] == sector

df = df.loc[sector_condition]

# Unfiltered Data
display(df[['ticker', 'shortName', 'price', 'priceCurrency', 'Dividend rate', 'Buyback rate', 'sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'ROC', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio']])

Unnamed: 0,ticker,shortName,price,priceCurrency,Dividend rate,Buyback rate,sales_growth,Avg_Gross_margin,Avg_ebit_margin,ROC,lcr,core_lcr,Current ratio,Debt ratio
0,000001.SZ,PING AN BANK,14.04,CNY,0.041243,0.0,0.103502,1.000000,0.712901,-1.519121,0.671888,0.671888,,1.846487
1,000002.SZ,CHINA VANKE CO,17.44,CNY,0.158336,0.0,0.080375,0.255349,0.204932,0.319760,0.084974,0.368498,1.268682,1.337640
2,000006.SZ,SHN ZHENYE GROUP,5.94,CNY,0.065755,0.0,0.052419,0.450108,0.419646,0.154353,0.243405,0.473683,2.201449,1.207039
3,000008.SZ,CHINA HIGH-SPEED R,2.49,CNY,0.028972,0.0,0.155036,0.356869,0.200403,0.448313,0.179871,0.219227,1.102672,0.818039
4,000009.SZ,CHINA BAOAN GROUP,12.70,CNY,0.020589,0.0,0.649123,0.316996,0.212751,0.327542,0.364856,0.631182,1.448853,1.895944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5208,688789.SS,HANGZHOU HONGHUA DIGITAL TECHNO,183.46,CNY,0.000166,0.0,0.317425,0.430488,0.361383,0.420373,2.717176,37.181340,5.528243,0.011479
5209,688798.SS,SHANGHAI AWINIC TECHNOLOGY CO L,113.00,CNY,0.000380,0.0,0.618599,0.364878,0.305664,0.393243,2.223062,4.684993,3.382583,0.158697
5211,688800.SS,SUZHOU RECODEAL INTERCONNECT SY,102.17,CNY,0.000178,0.0,0.477297,0.261015,0.225537,0.255695,1.203131,13.271815,2.592263,0.044652
5212,688819.SS,TIANNENG BATTERY GROUP CO LTD,36.83,CNY,0.020900,0.0,0.103029,0.178933,0.138344,2.782742,0.660497,2.350877,1.326139,0.414274


In [6]:
# common fitlering conditions: 
# 1. Reasonable leverage and Good Liqudity
common_1 = (core_lcr >= 0.2) & (lcr >= 0.6)
common_2 = (current_ratio >= 0.8) & (debt_ratio <= 0.8)
common_3 = (total_debt / df['currentAssets']) < 1.5
# 2. Avoid negative gross margin and value trap
common_4 = capitalization_price > 1000000000
common_5 = (capitalization_price > 6000000000) & (df['Avg_Gross_margin'] > 0.01)
common_6 = (capitalization_price <= 6000000000) & ((dividend_rate > 0.01) | (buyback_rate > 0.01))

# filtered by common conditions
common_df = df
common_df = common_df.loc[common_1 & common_2 & common_3 & common_4 & (common_5 | common_6)]
# display(common_df.loc[:, ['Ticker', 'Name', 'Price', 'Price_currency', 'Dividend rate', 'Buyback rate', 'Avg_sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'ROC', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio']])

#Step 3. Screening

Filter the dataset using the above conditions

In [7]:
# 1st set of conditions: Stability
asset_1 = (core_lcr >= 1) & (lcr >= 0.5)
# asset_2 = (dividend_rate > 0.01) | (buyback_rate > 0.01)
asset_3 = (current_ratio >= 1) & (debt_ratio <= 0.5)

# filtered by 1st set of conditions: Stability
asset_df = common_df
asset_df['is_Asset'] = True
asset_df = asset_df.loc[asset_1 & asset_3]
# display(asset_df.loc[:, ['Ticker', 'Name', 'Price', 'Price_currency', 'Dividend rate', 'Buyback rate', 'ROC', 'Avg_sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  asset_df['is_Asset'] = True


In [8]:
# 2nd set of conditions: Stalwart
stalwart_1 = (df['Avg_Gross_margin'] > 10) & (df['Avg_ebit_margin'] > 15)
stalwart_2 = df['sales_growth'] >= -0.10
stalwart_3 = greenblatt_roc > 0.03

# fitlered by 2nd set of conditions: Stalwart
Stalwart_df = common_df
Stalwart_df['is_Stalwart'] = True
Stalwart_df = Stalwart_df.loc[stalwart_1 & stalwart_2 & stalwart_3]
# display(Stalwart_df.loc[:, ['Ticker', 'Name', 'Price', 'Price_currency', 'Dividend rate', 'Buyback rate', 'Avg_sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'ROC', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Stalwart_df['is_Stalwart'] = True


In [9]:
# combine the results
result_set = pd.concat([Stalwart_df, asset_df])

# Sort the set
result_set = result_set.sort_values('EV')

# find the subset of Asset_play and Stalwart
is_asset_stalwart = [result_set['is_Asset'].eq(True) & result_set['is_Stalwart'].eq(True)]
result_set['A_S'] = np.select(is_asset_stalwart, [True], default='')

display(result_set[['ticker', 'shortName', 'sector', 'price', 'Dividend rate', 'Buyback rate', 'EV', 'EBIT/EV', 'ROC', 'sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio', 'A_S']])

Unnamed: 0,ticker,shortName,sector,price,Dividend rate,Buyback rate,EV,EBIT/EV,ROC,sales_growth,Avg_Gross_margin,Avg_ebit_margin,lcr,core_lcr,Current ratio,Debt ratio,A_S
4202,600938.SS,CNOOC LIMITED,Energy,16.04,0.426880,0.0,-1.024718e+11,-1.567866,0.314743,0.573098,0.629814,0.583901,0.913717,1.856857,1.893241,0.236530,
3934,600512.SS,TENGDA CONSTRUCTION GROUP CO.,Industrials,2.75,0.025026,0.0,-8.013553e+08,-1.512942,0.718075,0.173076,0.162683,0.145988,0.864124,1129.836867,1.901784,0.000501,
4181,600894.SS,GUANGZHOU GUANGRI STOCK CO LTD,Industrials,7.06,0.057203,0.0,-1.776079e+08,-3.688209,0.854642,0.146662,0.127233,0.095509,1.210001,35.755326,1.952635,0.012595,
4088,600751.SS,HNA TECHNOLOGY CO LTD,Technology,2.68,0.068574,0.0,7.126071e+08,13.848679,-3.156074,-0.490155,0.075689,0.057613,2.098801,190.190088,2.142719,0.005836,
3861,600373.SS,CHINESE UNIVERSE PUBLISHING&MED,Communication Services,9.95,0.064530,0.0,1.388753e+09,2.070548,-1.484452,0.036270,0.389276,0.288864,1.510645,4.818061,2.097458,0.209786,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3283,300760.SZ,SHENZHEN MINDRAY B,Healthcare,316.15,0.007929,0.0,3.694697e+11,0.032717,2.822726,0.201834,0.649865,0.471372,1.611192,62.483493,2.485588,0.008181,
4409,601888.SS,CHINA TOURISM GROUP DUTY FREE C,Consumer Cyclical,199.67,0.005056,0.0,3.727795e+11,0.049641,0.766980,0.286684,0.371609,0.253384,1.459664,7.766701,3.719121,0.062566,
4263,601088.SS,CHINA SHENHUA ENERGY COMPANY LT,Energy,28.06,0.084661,0.0,4.264328e+11,0.254629,0.505413,0.437073,0.367217,0.358277,1.284184,2.934052,2.399466,0.160019,
265,000858.SZ,WULIANGYE YIBIN CO,Consumer Defensive,204.65,0.012607,0.0,7.161429e+11,0.056579,1.580697,0.155056,0.747561,0.608518,3.206850,160.885140,4.878003,0.004594,


In [10]:
display_set = result_set.loc[result_set['A_S'] != '']
display(display_set[['ticker', 'shortName', 'sector', 'price', 'Dividend rate', 'Buyback rate', 'EV', 'EBIT/EV', 'ROC', 'sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio']])

Unnamed: 0,ticker,shortName,sector,price,Dividend rate,Buyback rate,EV,EBIT/EV,ROC,sales_growth,Avg_Gross_margin,Avg_ebit_margin,lcr,core_lcr,Current ratio,Debt ratio


In [11]:
# Export the screen result to csv file
export = 'Yes' #@param ["Yes","No"]
raw = 'No' #@param ["Yes","No"]
simplified = 'Yes' #@param ["Yes","No"]

if export == "Yes":
  from google.colab import files
  csv_name = "Screener_result.csv"
  if raw == "Yes":
    csv_name = "unfiltered_result.csv"
    if simplified == "Yes":
      df[['Ticker', 'Name', 'Sector', 'Price', 'Price_currency', 'Reporting_Currency', 'Dividend rate', 'Buyback rate', 'ROC', 'Avg_sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio']].to_csv(csv_name)
    else:
      csv_name = f"Raw_{csv_name}"
      df.to_csv(csv_name)
  else:
    if simplified == "Yes":
      result_set[['ticker', 'shortName', 'sector', 'price', 'Dividend rate', 'Buyback rate', 'EV', 'EBIT/EV', 'ROC', 'sales_growth', 'Avg_Gross_margin', 'Avg_ebit_margin', 'lcr' ,'core_lcr', 'Current ratio', 'Debt ratio', 'A_S']].to_csv(csv_name)
    else:
      csv_name = f"Raw_{csv_name}"
      result_set.to_csv(csv_name)

  # export
  files.download(csv_name)

print("***End of the Screener***")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

***End of the Screener***
