In [1]:
from bs4 import BeautifulSoup
from random import randrange
from tabulate import tabulate

import datetime
import json
import numpy as np
import pandas as pd
import requests
import requests.packages.urllib3
import time

In [6]:
def request_headler():
    headers = {
        "User-Agent" 		: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.4 Safari/605.1.15',
        "Accept"			: "application/xml, text/xml, */*; q=0.01",
        "Accept-Encoding" 	: "gzip, deflate",
        "Accept-Language"	: "Accept-Language: en,en-US;q=0.8,zh-TW;q=0.6,zh;q=0.4,zh-CN;q=0.2"
    }
    return headers


def tableDataText(table):    
    """Parses a html segment started with tag <table> followed 
    by multiple <tr> (table rows) and inner <td> (table data) tags. 
    It returns a list of rows with inner columns. 
    Accepts only one <th> (table header/data) in the first row.
    """
    def rowgetDataText(tr, coltag='td'): # td (data) or th (header)       
        return [td.get_text(strip=True) for td in tr.find_all(coltag)]  
    rows = []
    trs = table.find_all('tr')
    headerow = rowgetDataText(trs[0], 'th')
    if headerow: # if there is a header row include first
        rows.append(headerow)
        trs = trs[1:]
    for tr in trs: # for every table row
        rows.append(rowgetDataText(tr, 'td') ) # data row       
    return rows


def get_stock_name(earnings_summary_html):
    title = earnings_summary_html.find('title')
    stock_name = title.text.split()[0]
    
    return stock_name


def get_hsi_index_list(hsi_index_tc_html):
    hsi_link_list = hsi_index_tc_html.find_all('a', {'class':'a14 cls'})    
    hsi_list = []

    for link in hsi_link_list:        
        hsi_list.append(link.text.split('.')[0])
        
    return hsi_list


def get_hsi_index_list_from_kf(hsi_index_tc_html):
    hsi_link_list = hsi_index_tc_html.find_all('a', {'class':'list'})
    hsi_list = []
    hsi_dict = {}

    for link in hsi_link_list:
        hsi_list.append(link.text)
#         hsi_dict[link.text] = 

    return hsi_list


def to_float(string):
    value = 0.00
    
    try:
        value = float(string)
    except ValueError:
        value = 0.00
    
    return value
        
        
def get_the_earning_summary_dataframe(earnings_summary_html):
    
    # get the table info from earnings summary page
    table = earnings_summary_html.find('table', id="cnhk-list")
    list_table = tableDataText(table)
    new_list_table = [x for x in list_table if len(x) > 1]

    # convert the table to data frame
    dataframe = pd.DataFrame(new_list_table[1:], columns=new_list_table[0])
    
    if "Trend" in dataframe.columns:
        dataframe.drop(columns=["Trend"], inplace=True)
    
#     print(dataframe.head(20))
    
    return dataframe


def add_new_rows_to_dataframe(dataframe):
    
    # define the index of different fields
    divdend_per_share_index = 4
    yield_percentage_index = 6
    nav_index = 8
    
    market_price_list = ['Market Price ($)']
    price_to_book_list = ['Price to Book (%)']
    yield_growth_list = ['Yield Growth (%)'] 
    
    previous_yield_percentage = -1
    
    for colume_index in range(1,6):
        # convert the data in field to float 
        divdend_per_share = to_float(dataframe.iloc[divdend_per_share_index][colume_index])
        yield_percentage = to_float(dataframe.iloc[yield_percentage_index][colume_index])
        nav = to_float(dataframe.iloc[nav_index][colume_index])

        # calculate the market price and price to book
        if yield_percentage > 0:
            market_price = round(divdend_per_share/ (yield_percentage/100), 2)
        else:
            market_price = 0.00
            
        if nav> 0:
            price_to_book = round(market_price/nav, 4) * 100
        else:
            price_to_book = 0.00
        
        # calulate the yield growth
        if previous_yield_percentage == -1:
            yield_growth = 0
        else:
            if previous_yield_percentage > 0:
                yield_growth = round((yield_percentage - previous_yield_percentage)/previous_yield_percentage, 4) * 100
            else:
                yield_growth = 0.00
            
        previous_yield_percentage = yield_percentage
        
        # append the new row to the list
        market_price_list.append(market_price)
        price_to_book_list.append(price_to_book)
        yield_growth_list.append(yield_growth)

#         print('{:.4f} {:.2f}% ${:.2f} {:.2f}%'.format(
#             divdend_per_share, yield_percentage, market_price, price_to_book))
    
    # append the new rows to dataframe
    dataframe.loc[dataframe.index.max() + 1] = market_price_list
    dataframe.loc[dataframe.index.max() + 1] = price_to_book_list
    dataframe.loc[dataframe.index.max() + 1] = yield_growth_list
    
#     print(dataframe.head(20))
    
    return dataframe


def calculate(dataframe, checking_index, expected_value):
    checking_pass = 0
    checking_fail = 0
    
    for colume_index in range(1,6):
        try:
            value = float(dataframe.iloc[checking_index][colume_index])
            if value > expected_value:
                checking_pass += 1
            else:
                checking_fail += 1
                
        except ValueError:
            checking_fail += 1

    score = round(float(checking_pass/(checking_pass + checking_fail)) * 100)
    
    return score


def calculate_by_years(dataframe, checking_index, expected_value):
    checking_pass = 0
    checking_fail = 0
            
    # so the laster year can get more socre
    year = 1
    for colume_index in range(1,6):
        
        base_score = year
        
        try:
            value = float(dataframe.iloc[checking_index][colume_index])
            if value > expected_value:
                checking_pass += base_score
            else:
                checking_fail += base_score
                
        except ValueError:
            checking_fail += base_score
            
        year += 1
    
    score = round(float(checking_pass/(checking_pass + checking_fail)) * 100)
    
    print('calculate_by_years:{}, {}'.format(checking_pass, checking_fail))
    
    return score


def rule_1(dataframe):
    # rule 1 : check the EPS (Earnings Per Share) Growth (>0%)
    eps_index = 3
    eps_expected_value = 0
    eps_score = calculate_by_years(dataframe, eps_index, eps_expected_value)
    
    return eps_score


def rule_2a(dataframe):    
    # rule 2a : check the Dividend Yield (>5%)
    yield_index = 6
    yield_expected_value = 5
    yield_score = calculate_by_years(dataframe, yield_index, yield_expected_value)
        
    return yield_score


def rule_2b(dataframe):        
    # rule 2b : check the Dividend Yield Growth (>0%)
    yield_growth_index = 13
    yield_growth_expected_value = 0
    yield_growth_score = calculate_by_years(dataframe, yield_growth_index, yield_growth_expected_value)
        
    return yield_growth_score


def calculate_score(stock, stock_name, score_dict, dataframe):
    
    # rule 1 : check the EPS (Earnings Per Share) Growth (>0%)
    eps_score = rule_1(dataframe)

    # rule 2a : check the Dividend Yield (>5%)
    yield_score = rule_2a(dataframe)
    
    # rule 2b : check the Dividend Yield Growth (>0%)
    yield_growth_score = rule_2b(dataframe)
        
    total_score = round(np.average([eps_score, yield_score, yield_growth_score]), 0)
    
    # add the score to dict
    score_dict['stock'].append(stock)
    score_dict['stock_name'].append(stock_name)
    score_dict['total_score'].append(total_score)
    score_dict['eps_score'].append(eps_score)
    score_dict['yield_score'].append(yield_score)
    score_dict['yield_growth_score'].append(yield_growth_score)

    result =  '{} {} \n'.format(stock, stock_name)
    result += 'Total Score (over 100):{} \n' \
              '====================\n' \
              'Rule 1:Earnings Per Share Score:{} \n' \
              'Rule 2a:Dividend Yield:{} \n' \
              'Rule 2b:Dividend Yield Growth:{}'.format(total_score,
                                                        eps_score, 
                                                        yield_score, 
                                                        yield_growth_score)
            
    print(result)
    print(dataframe)
    return pd.DataFrame(score_dict)
    

def dump_to_json(dataframe):
    json_data = dataframe.to_json(orient="records")
    
    parsed = json.loads(json_data)
    print(json.dumps(parsed, indent=4)  )

In [7]:
debug_mode = True

aastock_link_en = 'http://www.aastocks.com/en/stocks/analysis/company-fundamental/earnings-summary'
aastock_link_tc = 'http://www.aastocks.com/tc/stocks/analysis/company-fundamental/earnings-summary'
asstock_hsi_link = 'http://www.aastocks.com/tc/stocks/market/index/hk-index-con.aspx?index=HSI'
kf_hsi_link = 'http://fund.kf-studio.net/hsi_stock.php'

headers = request_headler()
session = requests.Session()

# score dict dataframe
column_name = ['stock', 'stock_name', 'total_score', 'eps_score', 'yield_score', 'yield_growth_score']
score_dict = { name: [] for name in column_name }

# score earning summary dataframe
stock_tc_earnings_summary = {}
stock_en_earnings_summary = {}

# start 

# get the hsi list from website
if not debug_mode:
    url = kf_hsi_link
    request = session.get(url, headers=headers, allow_redirects=True, verify=False)
    hsi_index_tc_html = BeautifulSoup(request.text, 'lxml')
    stock_list = get_hsi_index_list_from_kf(hsi_index_tc_html)
    print(stock_list)
else:
    print('Debug Mode')
    stock_list = ['03988', '03328']

for stock in stock_list:
    # en version
    url = '{}?symbol={}'.format(aastock_link_en, stock)
    request = session.get(url, headers=headers, allow_redirects=True, verify=False)
    earnings_summary_en_html = BeautifulSoup(request.text, 'lxml')
    
    time.sleep(1)
    
    # tc version
    # get the stock name from website
    url = '{}?symbol={}'.format(aastock_link_tc, stock)
    request = session.get(url, headers=headers, allow_redirects=True, verify=False)
    earnings_summary_tc_html = BeautifulSoup(request.text, 'lxml')

    stock_name = get_stock_name(earnings_summary_tc_html)
    dataframe_tc = get_the_earning_summary_dataframe(earnings_summary_tc_html)
    
    # get the dataframe from earning summary page
    dataframe_en = get_the_earning_summary_dataframe(earnings_summary_en_html)
    
    # use the earning summary page to calculate the score
    dataframe_en = add_new_rows_to_dataframe(dataframe_en)
    calculate_score(stock, stock_name, score_dict, dataframe_en)
    
    # update the earning summary dataframe
    stock_tc_earnings_summary[stock] = dataframe_tc
    stock_en_earnings_summary[stock] = dataframe_en
    
    print(tabulate(dataframe_tc, headers='keys', tablefmt='psql'))    
    
    # sleep, avoid request too many time 
    time.sleep(2)

Debug Mode
calculate_by_years:12, 3
calculate_by_years:13, 2
calculate_by_years:12, 3
03988 中國銀行 
Total Score (over 100):82.0 
Rule 1:Earnings Per Share Score:80 
Rule 2a:Dividend Yield:87 
Rule 2b:Dividend Yield Growth:80
             Closing Date  2015/12  2016/12  2017/12  2018/12  2019/12
0         Net Profit (Mn)  200,931  182,887  206,699  205,046  209,425
1   Net Profit Growth (%)    -5.06    -8.98    13.02    -0.80     2.14
2      Earnings Per Share   0.6586   0.6001   0.6714   0.6718   0.6817
3          EPS Growth (%)   -13.48    -8.89    11.88     0.06     1.47
4      Dividend Per Share   0.2058   0.1867   0.2110   0.2095   0.2134
5                  PE (X)     5.86     7.03     5.50     4.36     3.65
6               Yield (%)     5.33     4.42     5.72     7.15     8.57
7     Dividend Payout (%)    31.25    31.11    31.43    31.19    31.31
8                     NAV   5.2134   5.3288   6.0926   6.2385   7.0291
9                Currency      HKD      HKD      HKD      HKD      

In [137]:
score_df = pd.DataFrame(score_dict)
score_df.sort_values(by=['total_score'], ascending=False,inplace=True)
score_df

Unnamed: 0,stock,stock_name,total_score,eps_score,yield_score,yield_growth_score
49,3988,中國銀行,82.0,80,87,80
48,3328,交通銀行,82.0,80,87,80
16,267,中信股份,78.0,80,60,93
40,2007,碧桂園,78.0,93,60,80
29,1038,長江基建集團,78.0,80,60,93
46,2388,中銀香港,71.0,80,60,73
27,939,建設銀行,71.0,53,80,80
26,883,中國海洋石油,69.0,60,67,80
0,1,長和,69.0,87,60,60
5,11,恆生銀行,67.0,87,33,80


In [125]:
for index, row in score_df. iterrows():
    print('Score:{} {}({})'.format(row['total_score'], row['stock'], row['stock_name']))    
    print(tabulate(stock_tc_earnings_summary[row['stock']], headers='keys', tablefmt='psql')) 

Score:67.0 03988(中國銀行)
+----+------------------+-----------+-----------+-----------+-----------+-----------+--------+
|    | 截止日期         | 2015/12   | 2016/12   | 2017/12   | 2018/12   | 2019/12   | 走勢   |
|----+------------------+-----------+-----------+-----------+-----------+-----------+--------|
|  0 | 盈利(百萬)       | 200,931   | 182,887   | 206,699   | 205,046   | 209,425   |        |
|  1 | 盈利增長(%)      | -5.06     | -8.98     | 13.02     | -0.80     | 2.14      |        |
|  2 | 每股盈利         | 0.6586    | 0.6001    | 0.6714    | 0.6718    | 0.6817    |        |
|  3 | 每股盈利增長(%)  | -13.48    | -8.89     | 11.88     | 0.06      | 1.47      |        |
|  4 | 每股派息         | 0.2058    | 0.1867    | 0.2110    | 0.2095    | 0.2134    |        |
|  5 | 市盈率(倍)       | 5.86      | 7.03      | 5.50      | 4.36      | 3.65      |        |
|  6 | 周息率(%)        | 5.33      | 4.42      | 5.72      | 7.15      | 8.57      |        |
|  7 | 派息比率(%)      | 31.25     | 31.11     | 31.43     | 31.1

In [144]:
temp = score_df.copy()
# temp.colums = ['Stock', 'Stock Name', 'Score', '盈利增長', '派息', '派息增長']
temp.rename(columns={'stock':'Stock',
                     'stock_name':'Stock Name', 
                     'total_score':'Score',
                     'eps_score':'盈利增長',
                     'yield_score':'派息',
                     'yield_growth_score':'派息增長'})

Unnamed: 0,Stock,Stock Name,Score,盈利增長,派息,派息增長
49,3988,中國銀行,82.0,80,87,80
48,3328,交通銀行,82.0,80,87,80
16,267,中信股份,78.0,80,60,93
40,2007,碧桂園,78.0,93,60,80
29,1038,長江基建集團,78.0,80,60,93
46,2388,中銀香港,71.0,80,60,73
27,939,建設銀行,71.0,53,80,80
26,883,中國海洋石油,69.0,60,67,80
0,1,長和,69.0,87,60,60
5,11,恆生銀行,67.0,87,33,80
