# Bursa Malaysia Scraper

- Author: Kelvin You
- Date: 2025-01-19
- Description: This script is used to scrape data from Bursa Malaysia website.

In [8]:
import numpy as np, pandas as pd
import os
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from time import time, sleep
from tqdm import tqdm

### Get all listed company names and stock codes from Bursa Malaysia

In [5]:
# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0 Safari/537.36')
driver = webdriver.Chrome(options=options)
driver.maximize_window()

# Open target website
driver.get('http://www.bursamalaysia.com/market/listed-companies/list-of-companies/main-market/')

# Wait for table to load
wait = WebDriverWait(driver, 20)
table = wait.until(EC.presence_of_element_located((By.ID, 'DataTables_Table_0')))

codes, names = [], []  # Store stock codes and company names

def get_last_page_number():
    # Wait for the pagination to be visible
    wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'pagination')))
    
    # Wait for the page items to be loaded
    pagination = driver.find_element(By.CLASS_NAME, 'pagination')
    page_items = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'paginate_button'))
    )
    
    last_page = None

    # Iterate over the page items and find the last one with a page number
    for item in page_items:
        if item.text.strip().isdigit():
            last_page = int(item.text.strip())

    return last_page

# Get the last page number
last_page_number = get_last_page_number()
print(f"Last page number is: {last_page_number}")

while True:
    try:
        # Wait for rows to load
        tbody = wait.until(EC.presence_of_element_located((By.TAG_NAME, 'tbody')))
        rows = tbody.find_elements(By.TAG_NAME, 'tr')
        
        # Iterate over each row and extract data
        for row in rows:
            try:
                # Extract stock code and company name
                stock_link = row.find_element(By.CLASS_NAME, 'company-announcement-link')
                stock_code = stock_link.get_attribute('href').split('=')[-1]
                name = stock_link.text.strip()

                codes.append(stock_code)
                names.append(name)
            except Exception as e:
                print(f"Error processing row: {e}")

        # Wait for the "Next" button to be clickable
        next_button = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, 'Next')))
        
        # Scroll to the "Next" button to ensure it's in view
        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        sleep(1)  # Small delay to ensure visibility

        pagination = driver.find_element(By.CLASS_NAME, 'pagination')
        current_page_link = pagination.find_elements(By.CLASS_NAME, 'active')[0]
        current_page = int(current_page_link.text)
        # Check if the "Next" button is disabled
        if current_page == last_page_number:
            print("Reached the last page.")
            break  # Exit loop if it's the last page
        else:
            # Trigger a click using JavaScript
            driver.execute_script("arguments[0].click();", next_button)
            sleep(2)  # Wait for the next page to load

    except Exception as e:
        print(f"Error during scraping: {e}")
        break

# Close the browser
driver.quit()

# Print results
print("Stock Codes:", codes)
print("Company Names:", names)


Last page number is: 28
Reached the last page.
Stock Codes: ['5250', '5326', '7214', '7167', '7086', '5198', '7131', '1481', '5281', '7191', '7146', '6599', '5139', '5185', '7315', '7078', '5238', '7219', '2658', '7609', '5116', '5269', '5115', '2674', '2488', '1163', '5127', '5293', '5307', '5120', '1015', '7031', '6351', '7083', '4758', '6556', '5082', '5568', '5088', '7090', '5015', '6432', '7181', '7007', '7218', '7722', '7129', '4057', '7020', '7162', '7054', '6399', '8176', '7048', '5130', '5302', '5025', '5182', '8885', '5204', '7579', '6888', '5106', '7120', '2305', '5021', '5329', '7005', '5258', '7251', '1899', '6602', '5190', '3239', '3395', '5196', '4219', '5248', '9814', '7668', '6173', '5932', '7195', '6998', '5032', '5069', '0168', '9288', '7036', '8133', '6297', '5100', '9938', '4162', '7221', '7188', '5210', '1818', '2828', '7174', '2852', '7128', '5105', '5311', '5099', '5180', '0163', '5257', '2836', '7076', '7035', '6947', '5195', '8052', '8982', '7209', '5273', '71

In [6]:
stock_code = pd.DataFrame({'code':codes,'name':names})
# stock_code.loc[370,'code'] = '5235SS'
# stock_code.drop(index=371,inplace=True)
# stock_code.reset_index(drop=True)
print(stock_code.shape)

(811, 2)


Unnamed: 0,code,name
0,5250,7-ELEVEN MALAYSIA HOLDINGS BERHAD


In [9]:
output_directory = 'data'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Save the CSV file
file_path = os.path.join(output_directory, '1_stock_code.csv')
stock_code.to_csv(file_path, index=False)