## Yahoo Finance

In [3]:
import yfinance as yf
import pandas as pd
from bs4 import BeautifulSoup
import re
from selenium import webdriver
import chromedriver_binary
import string
import datetime
from forex_python.converter import CurrencyRates
pd.options.display.float_format = '{:.3f}'.format

### Functions for screener results and getting data

In [4]:
def get_rate(from_currency, to_currency):
    curr_rates = CurrencyRates()
    return curr_rates.get_rate(from_currency.upper(), to_currency.upper())

def convert(value, rate):
    if value:
        return value * rate
    else:
        return ''

In [5]:
def set_dictionary(companies_info, ticker_info, symbol, curr_rate):
    companies_info[symbol] = {
        'symbol'           : ticker_info['symbol'] if 'symbol' in ticker_info else '',
        'shortName'        : ticker_info['shortName'] if 'shortName' in ticker_info else '',
        'country'          : ticker_info['country'] if 'country' in ticker_info else '',
        'averageVolume'    : ticker_info['averageVolume'] if 'averageVolume' in ticker_info else '',
        'currentPrice'     : ticker_info['currentPrice'] if 'currentPrice' in ticker_info else '',
        'currency'         : ticker_info['currency'] if 'currency' in ticker_info else '',
        'sector'           : ticker_info['sector'] if 'sector' in ticker_info else '',
        'industry'         : ticker_info['industry'] if 'industry' in ticker_info else '',
        'profitMargins'    : ticker_info['profitMargins'] if 'profitMargins' in ticker_info else '',
        'operatingMargins' : ticker_info['operatingMargins'] if 'operatingMargins' in ticker_info else '',
        'operatingCashflow': convert(ticker_info['operatingCashflow'],curr_rate) if 'operatingCashflow' in ticker_info else '',
        'returnOnAssets'   : ticker_info['returnOnAssets'] if 'returnOnAssets' in ticker_info else '',
        'returnOnEquity'   : ticker_info['returnOnEquity'] if 'returnOnEquity' in ticker_info else '',
        'trailingEps'      : convert(ticker_info['trailingEps'],curr_rate) if 'trailingEps' in ticker_info else '',
        'totalRevenue'     : convert(ticker_info['totalRevenue'],curr_rate) if 'totalRevenue' in ticker_info else '',
        'revenuePerShare'  : convert(ticker_info['revenuePerShare'],curr_rate) if 'revenuePerShare' in ticker_info else '',
        'revenueGrowth'    : ticker_info['revenueGrowth'] if 'revenueGrowth' in ticker_info else '',
        'grossProfits'     : convert(ticker_info['grossProfits'],curr_rate) if 'grossProfits' in ticker_info else '',
        'netIncomeToCommon': convert(ticker_info['netIncomeToCommon'],curr_rate) if 'netIncomeToCommon' in ticker_info else '',
        'operatingMargins' : ticker_info['operatingMargins'] if 'operatingMargins' in ticker_info else '',
        'totalCash'        : convert(ticker_info['totalCash'],curr_rate) if 'totalCash' in ticker_info else '',
        'totalCashPerShare': convert(ticker_info['totalCashPerShare'],curr_rate) if 'totalCashPerShare' in ticker_info else '',
        'totalDebt'        : convert(ticker_info['totalDebt'],curr_rate) if 'totalDebt' in ticker_info else '',
        'bookValue'        : ticker_info['bookValue'] if 'bookValue' in ticker_info else '',
        'marketCap'        : convert(ticker_info['marketCap'],curr_rate) if 'marketCap' in ticker_info else '',
        'enterpriseValue'  : convert(ticker_info['enterpriseValue'],curr_rate) if 'enterpriseValue' in ticker_info else '',
        'trailingPE'       : ticker_info['trailingPE'] if 'trailingPE' in ticker_info else '',
        'earningsQuarterlyGrowth': ticker_info['earningsQuarterlyGrowth'] if 'earningsQuarterlyGrowth' in ticker_info else '',
        'forwardPE'              : ticker_info['forwardPE'] if 'forwardPE' in ticker_info else '',
        'pegRatio'               : ticker_info['pegRatio'] if 'pegRatio' in ticker_info else '',
        'priceToSalesTrailing12Months': ticker_info['priceToSalesTrailing12Months'] if 'priceToSalesTrailing12Months' in ticker_info else '',
        'priceToBook'         : ticker_info['priceToBook'] if 'priceToBook' in ticker_info else '',
        'enterpriseToRevenue' : ticker_info['enterpriseToRevenue'] if 'enterpriseToRevenue' in ticker_info else '',
        'beta'                : ticker_info['beta'] if 'beta' in ticker_info else '',
        'fiftyTwoWeekHigh'    : convert(ticker_info['fiftyTwoWeekHigh'],curr_rate) if 'fiftyTwoWeekHigh' in ticker_info else '',
        'fiftyTwoWeekLow'     : convert(ticker_info['fiftyTwoWeekLow'],curr_rate) if 'fiftyTwoWeekLow' in ticker_info else '',
        'fiftyDayAverage'     : convert(ticker_info['fiftyDayAverage'],curr_rate) if 'fiftyDayAverage' in ticker_info else '',
        'twoHundredDayAverage': convert(ticker_info['twoHundredDayAverage'],curr_rate) if 'twoHundredDayAverage' in ticker_info else '',
        'trailingAnnualDividendYield' : ticker_info['trailingAnnualDividendYield'] if 'trailingAnnualDividendYield' in ticker_info else '',
        'fiveYearAvgDividendYield'    : ticker_info['fiveYearAvgDividendYield'] if 'fiveYearAvgDividendYield' in ticker_info else ''
    }
    
    return companies_info

In [6]:
"""
function for scraping screener results which takes two parameters url and currency,
so that we can reuse it for different countries screener 
"""
def get_companies(url):
    driver = webdriver.Chrome()
    driver.get(url)
    html = driver.execute_script('return document.body.innerHTML;')
    soup = BeautifulSoup(html,'html.parser')
    results = soup.find('table').find_all('tr')[1:]
    companies_results = []
    for tr in results:
        td = tr.find('td')
        companies_results.append(td.find('a').text)
    return companies_results


"""
Function for getting company financial informations from scraping results.
It takes parameter currency which is involved in a conditional. This conditional excludes stocks
with different currency from that of country/continent we're scraping.
"""
def get_companies_info(companies_results, currency, curr_rate, country):
    companies_info = dict() 
    for i in range(0, len(companies_results)):
        ct =  yf.Ticker(companies_results[i])
        print(ct.info['symbol'], end=' ')
        companies_info = set_dictionary(companies_info, ct.info, ct.info['symbol'],  curr_rate)
    return companies_info
            
def set_dataframe(companies_info):
    return pd.DataFrame(companies_info)

def save_to_csv(dataframe, filename):
    dataframe.to_csv(filename, sep=',', encoding='utf-8-sig', index=False)

In [7]:
# filter
def filter_info(given_currency, given_country, country_info):
    info = []
    info = [item for item in country_info if item['country'] == given_country]
    info1 = [item for item in info if item['currency'] == given_currency]
    return info1

# Asian Countries

## Taiwan

In [68]:
taiwan_url = 'https://finance.yahoo.com/screener/unsaved/3decab44-9275-48c0-ac94-fdd64f784504?dependentField=sector&dependentValues=Financial%20Services&offset=0&count=100'
taiwan_results = get_companies(taiwan_url)

In [70]:
taiwan_results = ['2886.TW',
 '2884.TW',
 '2885.TW',
 '5880.TW',
 '2892.TW',
 '2880.TW',
 '2887.TW',
 '5876.TW',
 '2801.TW',
 '2887F.TW',
 '2890.TW',
 '2887E.TW',
 '2834.TW',
 '2812.TW',
 '2809.TW',
 '2838.TW',
 '2845.TW',
 '2849.TW',
 '2838A.TW',
 '2897.TW',
 '2897A.TW',
 '2836.TW',
 '5863.TWO',
 '2836A.TW']

In [72]:
# c = CurrencyRates()
# taiwan_rate = c.get_rate('TWD', 'USD')
taiwan_rate = 0.036

In [73]:
taiwan_companies_info = get_companies_info(taiwan_results, 'TWD', taiwan_rate, 'Taiwan')

2886.TW 2884.TW 2885.TW 5880.TW 2892.TW 2880.TW 2887.TW 5876.TW 2801.TW 2887F.TW 2890.TW 2887E.TW 2834.TW 2812.TW 2809.TW 2838.TW 2845.TW 2849.TW 2838A.TW 2897.TW 2897A.TW 2836.TW 5863.TWO 2836A.TW 

In [74]:
taiwan_companies_info2 = filter_info('TWD', 'Taiwan', taiwan_companies_info.values())
len(taiwan_companies_info), len(taiwan_companies_info2)

(24, 24)

In [77]:
taiwan_companies_df = set_dataframe(taiwan_companies_info2)

In [78]:
save_to_csv(taiwan_companies_df, 'taiwan.csv')

## China

In [14]:
# china_url = 'https://finance.yahoo.com/screener/unsaved/d0302f81-1ed9-41ec-8ebb-56c296f44ce5?dependentField=sector&dependentValues=Financial%20Services&offset=0&count=100'
# china_results = get_companies(china_url)

In [1]:
china_results = ['601398.SS',
 '600036.SS',
 '601939.SS',
 '601288.SS',
 '601988.SS',
 '601658.SS',
 '601166.SS',
 '000001.SZ',
 '601328.SS',
 '600000.SS',
 '002142.SZ',
 '601998.SS',
 '601818.SS',
 '600016.SS',
 '601229.SS',
 '601169.SS',
 '601009.SS',
 '600926.SS',
 '600919.SS',
 '600015.SS',
 '601825.SS',
 '601916.SS',
 '601838.SS',
 '601077.SS',
 '601577.SS',
 '601997.SS',
 '002966.SZ',
 '002936.SZ',
 '601187.SS',
 '002958.SZ',
 '600928.SS',
 '002948.SZ',
 '601963.SS',
 '601128.SS',
 '601860.SS',
 '600908.SS',
 '002839.SZ',
 '603323.SS',
 '002807.SZ',
 '601528.SS',
 '601665.SS']

In [8]:
c = CurrencyRates()
china_rate = c.get_rate('CNY', 'USD')

In [9]:
china_companies_info = get_companies_info(china_results, 'CNY', china_rate, 'China')

601398.SS 600036.SS 601939.SS 601288.SS 601988.SS 601658.SS 601166.SS 000001.SZ 601328.SS 600000.SS 002142.SZ 601998.SS 601818.SS 600016.SS 601229.SS 601169.SS 601009.SS 600926.SS 600919.SS 600015.SS 601825.SS 601916.SS 601838.SS 601077.SS 601577.SS 601997.SS 002966.SZ 002936.SZ 601187.SS 002958.SZ 600928.SS 002948.SZ 601963.SS 601128.SS 601860.SS 600908.SS 002839.SZ 603323.SS 002807.SZ 601528.SS 601665.SS 

In [13]:
china_companies_info2 = filter_info('CNY', 'China', china_companies_info.values())
len(china_companies_info), len(china_companies_info2)

(41, 38)

In [14]:
china_companies_df = set_dataframe(china_companies_info2)

In [15]:
save_to_csv(china_companies_df, 'china.csv')

## Singapore

In [32]:
# singapore_url = 'https://finance.yahoo.com/screener/unsaved/3ebd553c-f625-4ab6-a6e5-753092cbede1?dependentField=sector&dependentValues=Financial%20Services'
# singapore_results = get_companies(singapore_url)

In [16]:
singapore_results = ['D05.SI', 'O39.SI', 'U11.SI']

In [17]:
c = CurrencyRates()
singapore_rate = c.get_rate('SGD', 'USD')

In [18]:
singapore_companies_info = get_companies_info(singapore_results, 'SGD', singapore_rate, 'Singapore')

D05.SI O39.SI U11.SI 

In [19]:
singapore_companies_info2 = filter_info('SGD', 'Singapore', singapore_companies_info.values())
len(singapore_companies_info), len(singapore_companies_info2)

(3, 3)

In [20]:
singapore_companies_df = set_dataframe(singapore_companies_info2)

In [21]:
save_to_csv(singapore_companies_df, 'singapore.csv')

## South Korea

In [12]:
# south_korea_url = 'https://finance.yahoo.com/screener/unsaved/b5e25f9a-cd0a-46ba-b81d-2009b47fed72?dependentField=sector&dependentValues=Financial%20Services'
# south_korea_results = get_companies(south_korea_url)

In [22]:
south_korea_results = ['323410.KS','105560.KS','055550.KS','086790.KS','316140.KS','024110.KS','138930.KS','175330.KS','139130.KS','010050.KS','006220.KS']

In [23]:
c = CurrencyRates()
korea_rate = c.get_rate('KRW', 'USD')

In [24]:
south_korea_companies_info = get_companies_info(south_korea_results, 'KRW', korea_rate, 'South Korea')

323410.KS 105560.KS 055550.KS 086790.KS 316140.KS 024110.KS 138930.KS 175330.KS 139130.KS 010050.KS 006220.KS 

In [29]:
south_korea_companies_info2 = filter_info('KRW', 'South Korea', south_korea_companies_info.values())
len(south_korea_companies_info), len(south_korea_companies_info2)

(11, 10)

In [30]:
south_korea_companies_df = set_dataframe(south_korea_companies_info2)

In [31]:
save_to_csv(south_korea_companies_df, 'south_korea.csv')

## Japan

In [146]:
# japan_url = 'https://finance.yahoo.com/screener/unsaved/ec4095f6-4b0b-4485-a1b5-0084d48018bb?dependentField=sector&dependentValues=Financial%20Services&offset=0&count=100'
# japan_results = get_companies(japan_url)

In [36]:
japan_results = ['8306.T',
 '8316.T',
 '8411.T',
 '6178.T',
 '7182.T',
 '8421.T',
 '8309.T',
 '8308.T',
 '7186.T',
 '8331.T',
 '8355.T',
 '8369.T',
 '8303.T',
 '8354.T',
 '8304.T',
 '8410.T',
 '7167.T',
 '7337.T',
 '8359.T',
 '8385.T',
 '7180.T',
 '8382.T',
 '8418.T',
 '8334.T',
 '7327.T',
 '8377.T',
 '8370.T',
 '7189.T',
 '8524.T',
 '8341.T',
 '8388.T',
 '8366.T',
 '8358.T',
 '8381.T',
 '8356.T',
 '8368.T',
 '8361.T',
 '8397.T',
 '8367.T',
 '8336.T',
 '8363.T',
 '8544.T',
 '8346.T',
 '8600.T',
 '8522.T',
 '7173.T',
 '8386.T',
 '8342.T',
 '8527.T',
 '8362.T',
 '7322.T',
 '8393.T',
 '8399.T',
 '8387.T',
 '8530.T',
 '8345.T',
 '8541.T',
 '8392.T',
 '8344.T',
 '8301.T',
 '8360.T',
 '8343.T',
 '8395.T',
 '8713.T',
 '8558.T',
 '7184.T',
 '8364.T',
 '8550.T',
 '8337.T',
 '8350.T',
 '7161.T',
 '8338.T',
 '8537.T',
 '8551.T',
 '8365.T',
 '8542.T',
 '8383.T',
 '8521.T',
 '8349.T',
 '8398.F',
 '8563.T',
 '8416.T',
 '8562.T',
 '8540.F',
 '8554.F',
 '8560.F',
 '7150.T',
 '8559.F']

In [37]:
c = CurrencyRates()
japan_rate = c.get_rate('JPY', 'USD')

In [38]:
japan_companies_info = get_companies_info(japan_results, 'JPY', japan_rate, 'Japan')

8306.T 8316.T 8411.T 6178.T 7182.T 8421.T 8309.T 8308.T 7186.T 8331.T 8355.T 8369.T 8303.T 8354.T 8304.T 8410.T 7167.T 7337.T 8359.T 8385.T 7180.T 8382.T 8418.T 8334.T 7327.T 8377.T 8370.T 7189.T 8524.T 8341.T 8388.T 8366.T 8358.T 8381.T 8356.T 8368.T 8361.T 8397.T 8367.T 8336.T 8363.T 8544.T 8346.T 8600.T 8522.T 7173.T 8386.T 8342.T 8527.T 8362.T 7322.T 8393.T 8399.T 8387.T 8530.T 8345.T 8541.T 8392.T 8344.T 8301.T 8360.T 8343.T 8395.T 8713.T 8558.T 7184.T 8364.T 8550.T 8337.T 8350.T 7161.T 8338.T 8537.T 8551.T 8365.T 8542.T 8383.T 8521.T 8349.T 8398.F 8563.T 8416.T 8562.T 8540.F 8554.F 8560.F 7150.T 8559.F 

In [40]:
japan_companies_info2 = filter_info('JPY', 'Japan', japan_companies_info.values())
len(japan_companies_info), len(japan_companies_info2)

(88, 88)

In [41]:
japan_companies_df = set_dataframe(japan_companies_info2)

In [42]:
save_to_csv(japan_companies_df, 'japan.csv')

## Hong Kong

In [28]:
# hk_url = 'https://finance.yahoo.com/screener/unsaved/c63e1aab-fbb9-44d9-ac16-c06a9d13f12e?dependentField=sector&dependentValues=Financial%20Services&offset=0&count=100'
# hk_results = get_companies(hk_url)

In [43]:
hk_results = ['1398.HK',
 '3968.HK',
 '0939.HK',
 '1288.HK',
 '3988.HK',
 '0005.HK',
 '1658.HK',
 '3328.HK',
 '0011.HK',
 '2388.HK',
 '0998.HK',
 '6818.HK',
 '1988.HK',
 '2888.HK',
 '2016.HK',
 '2066.HK',
 '9668.HK',
 '3618.HK',
 '6199.HK',
 '0023.HK',
 '3698.HK',
 '1551.HK',
 '6196.HK',
 '6190.HK',
 '3866.HK',
 '1216.HK',
 '1578.HK',
 '1916.HK',
 '9677.HK',
 '2139.HK',
 '1963.HK',
 '2356.HK',
 '6138.HK',
 '2558.HK',
 '0440.HK',
 '1983.HK',
 '0626.HK']

In [45]:
c = CurrencyRates()
hk_rate = c.get_rate('HKD', 'USD')

In [47]:
hk_companies_info = get_companies_info(hk_results, 'HKD', hk_rate, 'Hong Kong')

1398.HK 3968.HK 0939.HK 1288.HK 3988.HK 0005.HK 1658.HK 3328.HK 0011.HK 2388.HK 0998.HK 6818.HK 1988.HK 2888.HK 2016.HK 2066.HK 9668.HK 3618.HK 6199.HK 0023.HK 3698.HK 1551.HK 6196.HK 6190.HK 3866.HK 1216.HK 1578.HK 1916.HK 9677.HK 2139.HK 1963.HK 2356.HK 6138.HK 2558.HK 0440.HK 1983.HK 0626.HK 

In [48]:
hk_companies_info2 = filter_info('HKD', 'Hong Kong', hk_companies_info.values())
len(hk_companies_info), len(hk_companies_info2)

(37, 6)

In [49]:
hk_companies_df = set_dataframe(hk_companies_info2)

In [50]:
save_to_csv(hk_companies_df, 'hong_kong.csv')

## Qatar

In [7]:
# qatar_url = 'https://finance.yahoo.com/screener/unsaved/d4779638-a59f-4361-9f10-2f0db9513a90?dependentField=sector&dependentValues=Financial%20Services'
# qatar_results = get_companies(qatar_url)

In [51]:
qatar_results = ['QNBK.QA',
 'QIBK.QA',
 'MARK.QA',
 'CBQK.QA',
 'QIIK.QA',
 'ABQK.QA',
 'DHBK.QA',
 'KCBK.QA',
 'QFBQ.QA']

In [52]:
c = CurrencyRates()
# qatar_rate = c.get_rate('QAR', 'USD')
qatar_rate = 0.28

In [53]:
qatar_companies_info = get_companies_info(qatar_results, 'QAR', qatar_rate, 'Qatar')

QNBK.QA QIBK.QA MARK.QA CBQK.QA QIIK.QA ABQK.QA DHBK.QA KCBK.QA QFBQ.QA 

In [54]:
qatar_companies_info2 = filter_info('QAR', 'Qatar', qatar_companies_info.values())
len(qatar_companies_info), len(qatar_companies_info2)

(9, 9)

In [55]:
qatar_companies_df = set_dataframe(qatar_companies_info2)

In [56]:
save_to_csv(qatar_companies_df, 'qatar.csv')

# Save all dataset to a csv

In [80]:
twd = pd.read_csv('taiwan.csv')
cny = pd.read_csv('china.csv')
sgd = pd.read_csv('singapore.csv')
jpy = pd.read_csv('japan.csv')
krw = pd.read_csv('south_korea.csv')
hk = pd.read_csv('hong_kong.csv')
qar = pd.read_csv('qatar.csv')

In [81]:
banks_tickers = taiwan_results + china_results + singapore_results + hk_results + south_korea_results + japan_results + qatar_results
banks_info = list(taiwan_companies_info) + list(china_companies_info) + list(singapore_companies_info) + list(hk_companies_info) + list(south_korea_companies_info) + list(japan_companies_info) + list(qatar_companies_info)
banks_filtered = taiwan_companies_info2 + china_companies_info2 + singapore_companies_info2 + hk_companies_info2 + south_korea_companies_info2 + japan_companies_info2 + qatar_companies_info2
banks_df = pd.concat([twd,cny,sgd,jpy,krw,hk,qar], ignore_index=True)

len(banks_tickers), len(banks_info), len(banks_filtered), len(banks_df)

(213, 213, 178, 178)

In [82]:
banks_df.to_csv('banks_asia.csv')