In [1]:
from bs4 import BeautifulSoup # This module helps in web scrapping.
import requests  # This module helps us to download a web page

import pandas as pd # This module help the dataframe handling
import os # Interacting with system

import time # Calculate duration of code execution

In [2]:
# This will read the csv file that contains the list of all parliament seats
constituency_file = "election_analysis/electoral_district.csv"
constituency_data = pd.read_csv(constituency_file)

# Creating a folder for the results
output_folder_electionresult = "election_results"

In [None]:
# Get the start time
start_time = time.time()

print("Scrapping in Progress...")

# This will iterate the code to each parliament seat
for index, row in constituency_data.iterrows():
    constituency_url = row["URL"]
    constituency_code = row["Parliament Code"]
    constituency_name = row["Federal Constituency"]

    # Create output folders for each constituency
    constituency_folder_electionresult = os.path.join(output_folder_electionresult, constituency_code)
    os.makedirs(constituency_folder_electionresult, exist_ok=True)

    # Fetch data from the constituency URL
    data = requests.get(constituency_url)
    soup = BeautifulSoup(data.content, 'html.parser')

    # Use Beautiful Soup to scrape for "table"
    tables = soup.find_all("table")
    
    # This will create a new empty dictionary of election_table and empty list of table_index
    election_table = {}
    table_index = []
    year = 15 # Latest general election

    # Find the desired tables based on specific criteria in html
    for index, table in enumerate(tables):
        caption = table.find("caption")
        if caption and "Malaysian general" in caption.text:
            rows = table.find_all("tr")
            # Process the selected table
            row_list = []

            for tr in rows:
                td = tr.find_all('td')
                row = [i.text.strip().replace(',', '') for i in td]

                if 'Total valid votes' in row:
                    break

                row_list.append(row[1:5])

            election_table[year] = pd.DataFrame(row_list[1:5], columns=["Party", "Candidate", "Vote Count", "Percentage of Vote"])
            election_table[year].insert(0, 'General Election', year)

            table_index.append(year)
            year -= 1
            
            if year == 13:
                break

    # Print and save the dataframes for the constituency
    print(f"{constituency_code.upper().zfill(3)}",constituency_name)
    for year in table_index:
        # Define the path for the CSV file in the output folder
        csv_path = os.path.join(constituency_folder_electionresult, f"ge{year}_{constituency_code}_result.csv")

        # Save the election table for the current year as a CSV file
        election_table[year].to_csv(csv_path, index=False)
        print(f"{constituency_code.upper().zfill(3)}-GE{year} csv data completed")
    print()
        
print("Scrapping completed!")

# Get the end time
end_time = time.time()

# Calculate the duration in seconds
duration = end_time - start_time

# Convert duration to a more readable format
minutes = duration // 60
seconds = duration % 60

# Print the duration
print(f"Code execution time: {minutes:.0f} minutes {seconds:.2f} seconds")

Scrapping in Progress...
P001 Padang Besar
P001-GE15 csv data completed
P001-GE14 csv data completed

P002 Kangar
P002-GE15 csv data completed
P002-GE14 csv data completed

P003 Arau
P003-GE15 csv data completed
P003-GE14 csv data completed

P004 Langkawi
P004-GE15 csv data completed
P004-GE14 csv data completed

P005 Jerlun
P005-GE15 csv data completed
P005-GE14 csv data completed

P006 Kubang Pasu
P006-GE15 csv data completed
P006-GE14 csv data completed

P007 Padang Terap
P007-GE15 csv data completed
P007-GE14 csv data completed

P008 Pokok Sena
P008-GE15 csv data completed
P008-GE14 csv data completed

P009 Alor Setar
P009-GE15 csv data completed
P009-GE14 csv data completed

P010 Kuala Kedah
P010-GE15 csv data completed
P010-GE14 csv data completed

P011 Pendang
P011-GE15 csv data completed
P011-GE14 csv data completed

P012 Jerai
P012-GE15 csv data completed
P012-GE14 csv data completed

P013 Sik
P013-GE15 csv data completed
P013-GE14 csv data completed

P014 Merbok
P014-GE15 csv