In [1]:
import pandas as pd
import os

from bs4 import BeautifulSoup # this module helps in web scrapping.
import requests  # this module helps us to download a web page

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_Malaysian_electoral_districts"
os.makedirs("election_analysis", exist_ok=True)

In [3]:
ge_data  = requests.get(url).text

In [4]:
ge_data_soup = BeautifulSoup(ge_data,"html.parser")

In [5]:
tables = ge_data_soup.find_all('table') # in html table is represented by the tag <table>

In [6]:
# creates an empty list to store the target tables that contain the desired information
target_tables = []

for table in tables:
    # checks if the text "polling district" is present in the table
    if "polling district" in table.get_text().lower(): # method to retrieve the text content of the table and converts it to lowercase for case-insensitive comparison.
        target_tables.append(table)

# empty list to store the extracted data
data = []

for target_table in target_tables:
    rows = target_table.find_all("tr")

    for row in rows[1:]:
        cells = row.find_all("td")
        if cells:
            federal_constituency_link = cells[0].find("a")
            if federal_constituency_link:
                federal_constituency_name = federal_constituency_link.text.strip()
                federal_constituency_url = "https://en.wikipedia.org" + federal_constituency_link["href"]
                parliament_code = cells[0].text.strip().split()[0].replace(".", "").lower()
                # Exclude state constituency
                if not parliament_code.startswith("n"): 
                    data.append([federal_constituency_name, federal_constituency_url, parliament_code])

df = pd.DataFrame(data, columns=["Federal Constituency", "URL", "Parliament Code"])
# modifies the parliament code using lambda function
df["Parliament Code"] = df["Parliament Code"].apply(lambda x: x.replace(".", "_"))

df.to_csv("election_analysis/electoral_district.csv", index=False)