# ASX Companies Database

In [1]:
# dependencies
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from sqlalchemy import create_engine

### Table 1: ASX 200 Company

### Extract data from web scraping, source: https://www.marketindex.com.au/asx200

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)




In [3]:
# visit web
url = 'https://www.marketindex.com.au/asx200'
browser.visit(url)

In [4]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

In [17]:
# find and create list of asx 200 company code
stock_code = soup.find_all('tr', class_="star-stock-sibling")

stock_code_list = []
for x in stock_code:
    stock_code_list.append(x["data-quoteapi-id"])
    

In [12]:
# find and create list of one year performance of asx 200 company
performance = soup.find_all('td', class_="sticky-column")

performance_list = []

for x in performance:
    performance_list.append(x["$cur.yearChangeSignCSS"])

# data-quoteapi="$cur.yearPctChange (fullySigned); $cur.yearChangeSignCSS"

KeyError: '$cur.yearChangeSignCSS'

In [19]:
# end splinter
browser.quit()

In [18]:
# create dataframe
asx200 = pd.DataFrame(list(zip(stock_code_list, performance_list)),columns =['company_code', 'percent_change_one_year'])

asx200

Unnamed: 0,company_code,percent_change_one_year


### Transform data

### Table 2: ASX Company

### Extract data from CSV file, source: https://www.listcorp.com/asx/

In [39]:
# create dataframe from CSV file
asx_company = "companies-list.csv"
asx_company_df = pd.read_csv(asx_company)
len(asx_company_df)

2219

In [40]:
asx_company_df.head()

Unnamed: 0,Code,Company,Link,Market Cap,Last trade,Change,%Change,Sector
0,ASX:BHP,BHP Group Limited (ASX:BHP),https://www.listcorp.com/asx/bhp/bhp-group-lim...,189938000000,38.32,0.8,2.13,Materials
1,ASX:CBA,Commonwealth Bank (ASX:CBA),https://www.listcorp.com/asx/cba/commonwealth-...,168282000000,99.89,0.99,1.0,Financials
2,ASX:CSL,CSL Limited (ASX:CSL),https://www.listcorp.com/asx/csl/csl-limited,140576000000,291.4,-0.44,-0.15,Health Care
3,ASX:NAB,National Australia Bank (ASX:NAB),https://www.listcorp.com/asx/nab/nab,95792600000,30.33,0.2,0.66,Financials
4,ASX:WBC,Westpac Banking Corp (ASX:WBC),https://www.listcorp.com/asx/wbc/westpac,74959100000,21.43,0.02,0.09,Financials


### Transform data

In [45]:
# remove column
transformed_asx_company_df = asx_company_df[['Code', 'Company', 'Market Cap', 'Last trade', 'Change', '%Change', 'Sector']].copy()

# change column name
transformed_asx_company_df.columns = ['company_code', 'company_name', 'market_cap', 'price_28jul22', 'change_28jul22', 'percent_change_28jul22', 'sector']

# transformed_asx_company_df.shape[0]

# drop duplicates
transformed_asx_company_df = transformed_asx_company_df.drop_duplicates(subset=["company_code"])

# drop na
transformed_asx_company_df = transformed_asx_company_df.dropna(subset=["sector"])

print(len(transformed_asx_company_df))

# check data type
transformed_asx_company_df.dtypes

2219


company_code               object
company_name               object
market_cap                  int64
price_28jul22             float64
change_28jul22            float64
percent_change_28jul22    float64
sector                     object
dtype: object

In [48]:
# remame rows in column "company_code"
transformed_asx_company_df['company_code'] = transformed_asx_company_df['company_code'].map(lambda x: x.lstrip('ASX'))
transformed_asx_company_df.head()


Unnamed: 0,company_code,company_name,market_cap,price_28jul22,change_28jul22,percent_change_28jul22,sector
0,:BHP,BHP Group Limited (ASX:BHP),189938000000,38.32,0.8,2.13,Materials
1,:CBA,Commonwealth Bank (ASX:CBA),168282000000,99.89,0.99,1.0,Financials
2,:CSL,CSL Limited (ASX:CSL),140576000000,291.4,-0.44,-0.15,Health Care
3,:NAB,National Australia Bank (ASX:NAB),95792600000,30.33,0.2,0.66,Financials
4,:WBC,Westpac Banking Corp (ASX:WBC),74959100000,21.43,0.02,0.09,Financials


In [49]:
# remame rows in column "company_code"
transformed_asx_company_df['company_code'] = transformed_asx_company_df['company_code'].map(lambda x: x.lstrip(':'))
transformed_asx_company_df.head()


Unnamed: 0,company_code,company_name,market_cap,price_28jul22,change_28jul22,percent_change_28jul22,sector
0,BHP,BHP Group Limited (ASX:BHP),189938000000,38.32,0.8,2.13,Materials
1,CBA,Commonwealth Bank (ASX:CBA),168282000000,99.89,0.99,1.0,Financials
2,CSL,CSL Limited (ASX:CSL),140576000000,291.4,-0.44,-0.15,Health Care
3,NAB,National Australia Bank (ASX:NAB),95792600000,30.33,0.2,0.66,Financials
4,WBC,Westpac Banking Corp (ASX:WBC),74959100000,21.43,0.02,0.09,Financials


### Load dataframe to database 

In [50]:
# create connection
connection = "postgres:David$1986@localhost:5432/asx_db"
engine = create_engine(f'postgresql://{connection}')

In [51]:
# Confirm tables
engine.table_names()

  


['asx_companies']

In [52]:
# load dataframe
transformed_asx_company_df.to_sql(name="asx_companies", con=engine, if_exists='append', index=False)