## Yahoo Finance

In [7]:
import yfinance as yf
import pandas as pd
from bs4 import BeautifulSoup
import re
from selenium import webdriver
import chromedriver_binary
import string
import datetime
from forex_python.converter import CurrencyRates
pd.options.display.float_format = '{:.3f}'.format

### Functions for screener results and getting data

In [29]:
def get_rate(from_currency, to_currency):
    curr_rates = CurrencyRates()
    return curr_rates.get_rate(from_currency.upper(), to_currency.upper())

def convert(value, rate):
    if value:
        return value * rate
    else:
        return ''

In [30]:
def set_dictionary(companies_info, ticker_info, symbol, curr_rate):
    companies_info[symbol] = {
        'symbol'           : ticker_info['symbol'] if 'symbol' in ticker_info else '',
        'shortName'        : ticker_info['shortName'] if 'shortName' in ticker_info else '',
        'country'          : ticker_info['country'] if 'country' in ticker_info else '',
        'averageVolume'    : ticker_info['averageVolume'] if 'averageVolume' in ticker_info else '',
        'currentPrice'     : ticker_info['currentPrice'] if 'currentPrice' in ticker_info else '',
        'currency'         : ticker_info['currency'] if 'currency' in ticker_info else '',
        'sector'           : ticker_info['sector'] if 'sector' in ticker_info else '',
        'industry'         : ticker_info['industry'] if 'industry' in ticker_info else '',
        'profitMargins'    : ticker_info['profitMargins'] if 'profitMargins' in ticker_info else '',
        'operatingMargins' : ticker_info['operatingMargins'] if 'operatingMargins' in ticker_info else '',
        'operatingCashflow': convert(ticker_info['operatingCashflow'],curr_rate) if 'operatingCashflow' in ticker_info else '',
        'returnOnAssets'   : ticker_info['returnOnAssets'] if 'returnOnAssets' in ticker_info else '',
        'returnOnEquity'   : ticker_info['returnOnEquity'] if 'returnOnEquity' in ticker_info else '',
        'trailingEps'      : convert(ticker_info['trailingEps'],curr_rate) if 'trailingEps' in ticker_info else '',
        'totalRevenue'     : convert(ticker_info['totalRevenue'],curr_rate) if 'totalRevenue' in ticker_info else '',
        'revenuePerShare'  : convert(ticker_info['revenuePerShare'],curr_rate) if 'revenuePerShare' in ticker_info else '',
        'revenueGrowth'    : ticker_info['revenueGrowth'] if 'revenueGrowth' in ticker_info else '',
        'grossProfits'     : convert(ticker_info['grossProfits'],curr_rate) if 'grossProfits' in ticker_info else '',
        'netIncomeToCommon': convert(ticker_info['netIncomeToCommon'],curr_rate) if 'netIncomeToCommon' in ticker_info else '',
        'operatingMargins' : ticker_info['operatingMargins'] if 'operatingMargins' in ticker_info else '',
        'totalCash'        : convert(ticker_info['totalCash'],curr_rate) if 'totalCash' in ticker_info else '',
        'totalCashPerShare': convert(ticker_info['totalCashPerShare'],curr_rate) if 'totalCashPerShare' in ticker_info else '',
        'totalDebt'        : convert(ticker_info['totalDebt'],curr_rate) if 'totalDebt' in ticker_info else '',
        'bookValue'        : ticker_info['bookValue'] if 'bookValue' in ticker_info else '',
        'marketCap'        : convert(ticker_info['marketCap'],curr_rate) if 'marketCap' in ticker_info else '',
        'enterpriseValue'  : convert(ticker_info['enterpriseValue'],curr_rate) if 'enterpriseValue' in ticker_info else '',
        'trailingPE'       : ticker_info['trailingPE'] if 'trailingPE' in ticker_info else '',
        'earningsQuarterlyGrowth': ticker_info['earningsQuarterlyGrowth'] if 'earningsQuarterlyGrowth' in ticker_info else '',
        'forwardPE'              : ticker_info['forwardPE'] if 'forwardPE' in ticker_info else '',
        'pegRatio'               : ticker_info['pegRatio'] if 'pegRatio' in ticker_info else '',
        'priceToSalesTrailing12Months': ticker_info['priceToSalesTrailing12Months'] if 'priceToSalesTrailing12Months' in ticker_info else '',
        'priceToBook'         : ticker_info['priceToBook'] if 'priceToBook' in ticker_info else '',
        'enterpriseToRevenue' : ticker_info['enterpriseToRevenue'] if 'enterpriseToRevenue' in ticker_info else '',
        'beta'                : ticker_info['beta'] if 'beta' in ticker_info else '',
        'fiftyTwoWeekHigh'    : convert(ticker_info['fiftyTwoWeekHigh'],curr_rate) if 'fiftyTwoWeekHigh' in ticker_info else '',
        'fiftyTwoWeekLow'     : convert(ticker_info['fiftyTwoWeekLow'],curr_rate) if 'fiftyTwoWeekLow' in ticker_info else '',
        'fiftyDayAverage'     : convert(ticker_info['fiftyDayAverage'],curr_rate) if 'fiftyDayAverage' in ticker_info else '',
        'twoHundredDayAverage': convert(ticker_info['twoHundredDayAverage'],curr_rate) if 'twoHundredDayAverage' in ticker_info else '',
        'trailingAnnualDividendYield' : ticker_info['trailingAnnualDividendYield'] if 'trailingAnnualDividendYield' in ticker_info else '',
        'fiveYearAvgDividendYield'    : ticker_info['fiveYearAvgDividendYield'] if 'fiveYearAvgDividendYield' in ticker_info else ''
    }
    
    return companies_info

In [31]:
"""
function for scraping screener results which takes two parameters url and currency,
so that we can reuse it for different countries screener 
"""
def get_companies(url):
    driver = webdriver.Chrome()
    driver.get(url)
    html = driver.execute_script('return document.body.innerHTML;')
    soup = BeautifulSoup(html,'html.parser')
    results = soup.find('table').find_all('tr')[1:]
    companies_results = []
    for tr in results:
        td = tr.find('td')
        companies_results.append(td.find('a').text)
    return companies_results


"""
Function for getting company financial informations from scraping results.
It takes parameter currency which is involved in a conditional. This conditional excludes stocks
with different currency from that of country/continent we're scraping.
"""
def get_companies_info(companies_results, currency, curr_rate, country):
    companies_info = dict() 
    for i in range(0, len(companies_results)):
        ct =  yf.Ticker(companies_results[i])
        print(ct.info['symbol'], end=' ')
        companies_info = set_dictionary(companies_info, ct.info, ct.info['symbol'],  curr_rate)
    return companies_info


def set_dataframe(companies_info):
    return pd.DataFrame(companies_info)

def save_to_csv(dataframe, filename):
    dataframe.to_csv(filename, sep=',', encoding='utf-8-sig', index=False)

In [32]:
# filter
def filter_info(given_currency, given_country, country_info):
    info = []
    info = [item for item in country_info if item['country'] == given_country]
    info1 = [item for item in info if item['currency'] == given_currency]
    return info1

## Canada

In [109]:
canada_url = 'https://finance.yahoo.com/screener/unsaved/69b64d3d-7416-4ed4-93d6-b4ad835df2a0?dependentField=sector&dependentValues=Financial%20Services&offset=0&count=100'
canada_results = get_companies(canada_url)

In [33]:
canada_results = ['BMO.TO','BNS.TO','CM.TO','CWB.TO','LB.TO','NA.TO','RY.TO','TD.TO','VB.TO']

In [34]:
c = CurrencyRates()
canada_rate = c.get_rate('CAD', 'USD')

In [14]:
canada_companies_info = get_companies_info(canada_results, 'CAD', canada_rate, 'Canada')

BMO.TO BNS.TO CM.TO CWB.TO LB.TO NA.TO RY.TO TD.TO VB.TO 

In [15]:
canada_companies_info2 = filter_info('CAD', 'Canada', canada_companies_info.values())
len(canada_companies_info), len(canada_companies_info2)

(9, 9)

In [35]:
canada_companies_df = set_dataframe(canada_companies_info2)

In [36]:
save_to_csv(canada_companies_df, 'canada.csv')

## Australia

In [None]:
australia_url = 'https://finance.yahoo.com/screener/unsaved/2677be71-2801-464f-803a-9028e167fac1?dependentField=sector&dependentValues=Financial%20Services&offset=0&count=100'
australia_results = get_companies(australia_url)

In [18]:
australia_results = ['CBA.AX','WBC.AX','ANZ.AX','NAB.AX','BOQ.AX','VUK.AX','BEN.AX','MYS.AX','BBC.AX']

In [19]:
c = CurrencyRates()
australia_rate = c.get_rate('AUD', 'USD')

In [20]:
australia_companies_info = get_companies_info(australia_results, 'AUD', australia_rate, 'Australia')

CBA.AX WBC.AX ANZ.AX NAB.AX BOQ.AX VUK.AX BEN.AX MYS.AX BBC.AX 

In [21]:
australia_companies_info2 = filter_info('AUD', 'Australia', australia_companies_info.values())
len(australia_companies_info), len(australia_companies_info2)

(9, 8)

In [22]:
australia_companies_df = set_dataframe(australia_companies_info2)

In [23]:
save_to_csv(australia_companies_df, 'australia.csv')

## Chile

In [17]:
# chile_url = 'https://finance.yahoo.com/screener/unsaved/c83cf383-7cd6-4e23-8fe9-834ab79c8581?dependentField=sector&dependentValues=Financial%20Services'
# chile_results = get_companies(chile_url)

In [25]:
chile_results = ['CHILE.SN',
 'BSANTANDER.SN',
 'BCI.SN',
 'ITAUCORP.SN',
 'JPM.SN',
 'SECURITY.SN',
 'BAC.SN',
 'C.SN',
 'USB.SN']

In [26]:
c = CurrencyRates()
# chile_rate = c.get_rate('CLP', 'USD')
chile_rate = 0.0013

In [27]:
chile_companies_info = get_companies_info(chile_results, 'CLP', chile_rate, 'Chile')

CHILE.SN BSANTANDER.SN BCI.SN ITAUCORP.SN JPM.SN SECURITY.SN BAC.SN C.SN USB.SN 

In [37]:
chile_companies_info2 = filter_info('CLP', 'Chile', chile_companies_info.values())
len(chile_companies_info), len(chile_companies_info2)

(9, 5)

In [38]:
chile_companies_df = set_dataframe(chile_companies_info2)

In [39]:
save_to_csv(chile_companies_df, 'chile.csv')

# Save all dataset to a csv

In [43]:
cad = pd.read_csv('canada.csv')
aud = pd.read_csv('australia.csv')
clp = pd.read_csv('chile.csv')

In [49]:
banks_tickers = canada_results + australia_results + chile_results
banks_info = list(canada_companies_info) + list(australia_companies_info) + list(chile_companies_info)
banks_filtered = canada_companies_info2 + australia_companies_info2 + chile_companies_info2

banks_df = pd.concat([cad,aud,clp], ignore_index=True)

len(banks_tickers), len(banks_info), len(banks_filtered), len(banks_df)

(27, 27, 22, 22)

In [6]:
banks_df.to_csv('banks_others.csv', index=False)

NameError: name 'banks_df' is not defined

# Combine all data to single csv

In [5]:
europe = pd.read_csv('banks_europe.csv')
asia = pd.read_csv('banks_asia.csv')
usa = pd.read_csv('banks_usa.csv')
others = pd.read_csv('banks_others.csv')

all_banks_developed_df = pd.concat([europe, asia, usa, others], ignore_index=True)

all_banks_developed_df.to_csv('all_banks_developed.csv', index=False)