In [1]:
from bs4 import BeautifulSoup as bs
import requests
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', None)

In [2]:
#Importing data

State_seats = pd.read_csv("State_Seats.csv") #Source: https://en.wikipedia.org/wiki/Draft:Results_of_the_2022_Malaysia_general_election_by_parliamentary_constituency
State_seats["Registered_Voters"] = ""
State_seats["Winning_Candidate"] = ""
State_seats["Winning_Coalition"] = ""
State_seats["Winning_Party"] = ""
State_seats["Seat_Equivalent_Mean"] = ""
State_seats["Seat_Equivalent_Median"] = ""

In [3]:
#Get number of registered voters and winning candidate, party and coalition from TheStar

def get_data(f):
    url = f"https://election.thestar.com.my/{f.State}.html"
    
    html = requests.get(url).content
    soup = bs(html, 'html.parser')
    seat = soup.find(id={"s"+f.Parliament_ID})
    
    #Get registered voters
    votes = seat.findChildren("strong", recursive=True)
    f.Registered_Voters = int(votes[0].getText().replace(',', ''))
    
    #Get the winning candidate
    winner = seat.findChildren(class_="name-candidate", recursive=True)
    f.Winning_Candidate = winner[0].getText().replace(',', '')
    
    return f

State_seats = State_seats.apply(get_data, axis=1).sort_values(by='Registered_Voters')

In [4]:
#Split candidate name, party and coalition into separate columns

Temp = State_seats.Winning_Candidate.str.split("(", expand=True)
Temp.loc[Temp[2].notnull(), 1] = Temp[2]
Temp[1] = Temp[1].str.strip("()")
State_seats.Winning_Candidate = Temp[0]

State_seats["Winning_Coalition"] = Temp[1].str.split("-", expand=True)[0]
State_seats["Winning_Party"] = Temp[1].str.split("-", expand=True)[1]
State_seats.loc[State_seats["Winning_Party"].isnull(), ["Winning_Party", "Winning_Coalition"]] = State_seats.loc[State_seats["Winning_Party"].isnull(), ["Winning_Coalition", "Winning_Party"]].values

In [5]:
#Summarising registered voters' data

print("Min: ", State_seats.Registered_Voters.min())
print("Max: ", State_seats.Registered_Voters.max())
print("Mean: ", State_seats.Registered_Voters.mean().round(2))
print("Median: ", State_seats.Registered_Voters.median())

Min:  28290
Max:  303430
Mean:  95202.58
Median:  87588.0


In [6]:
#Seats grouped by coalitions above and below mean and median number of registered voters

Grouped = pd.DataFrame()
Grouped["Above_Mean"] = State_seats.loc[State_seats["Registered_Voters"]>State_seats.Registered_Voters.mean()].groupby("Winning_Coalition").size().sort_values(ascending=False)
Grouped["Above_Median"] = State_seats.loc[State_seats["Registered_Voters"]>State_seats.Registered_Voters.median()].groupby("Winning_Coalition").size().sort_values(ascending=False)
Grouped["Below_Mean"] = State_seats.loc[State_seats["Registered_Voters"]<State_seats.Registered_Voters.mean()].groupby("Winning_Coalition").size().sort_values(ascending=False)
Grouped["Below_Median"] = State_seats.loc[State_seats["Registered_Voters"]<State_seats.Registered_Voters.median()].groupby("Winning_Coalition").size().sort_values(ascending=False)
Grouped

Unnamed: 0_level_0,Above_Mean,Above_Median,Below_Mean,Below_Median
Winning_Coalition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PH,56,64,25,17
PN,29,40,44,33
BN,3,3,27,27
GPS,2,2,21,21


In [7]:
#Normalizing seats for registered voters mean and median

State_seats["Seat_Equivalent_Mean"] = State_seats["Registered_Voters"].apply(lambda x: (x/State_seats.Registered_Voters.mean()).round(2))
State_seats["Seat_Equivalent_Median"] = State_seats["Registered_Voters"].apply(lambda x: (x/State_seats.Registered_Voters.median()).round(2))
State_seats

Unnamed: 0,State,Parliament_ID,Parliament_Name,Registered_Voters,Winning_Candidate,Winning_Coalition,Winning_Party,Seat_Equivalent_Mean,Seat_Equivalent_Median
205,sarawak,P207,Igan,28290,Ahmad Johnie Zawawi,GPS,PBB,0.3,0.32
201,sarawak,P203,Lubok antu,28995,Roy Angau Gingkoi,GPS,PRS,0.3,0.33
208,sarawak,P210,Kanowit,30988,Aaron Ago Dagang,GPS,PRS,0.33,0.35
198,sarawak,P200,Batang sadong,32640,Rodiyah Sapiee,GPS,PBB,0.34,0.37
204,sarawak,P206,Tanjong manis,32948,Yusuf Wahab,GPS,PBB,0.35,0.38
220,sarawak,P222,Lawas,33655,Henry Sum Agong,GPS,PBB,0.35,0.38
207,sarawak,P209,Julau,34850,Larry Soon @ Larry Sng Wei Shien,,PBM,0.37,0.4
53,perak,P55,Lenggong,36950,Shamsul Anuar Nasarah,BN,UMNO,0.39,0.42
59,perak,P61,Padang rengas,38686,Azahari Hasan,PN,BERSATU,0.41,0.44
174,sabah,P176,Kimanis,40763,Mohamad Alamin,BN,UMNO,0.43,0.47


In [8]:
#Total number of adjusted seats

print("Equivalent seats (mean): ", State_seats["Seat_Equivalent_Mean"].sum().round(0))
print("Equivalent seats (median): ", State_seats["Seat_Equivalent_Median"].sum().round(0))

Equivalent seats (mean):  221.0
Equivalent seats (median):  240.0


In [9]:
#Group adjusted seats by coalition.

Summary = State_seats.groupby("Winning_Coalition")[["Seat_Equivalent_Mean", "Seat_Equivalent_Median"]].sum().sort_values(by="Seat_Equivalent_Median", ascending=False)
Summary["% (Mean)"] = (Summary["Seat_Equivalent_Mean"]/Summary["Seat_Equivalent_Mean"].sum()).round(3)*100
Summary["% (Median)"] = (Summary["Seat_Equivalent_Median"]/Summary["Seat_Equivalent_Median"].sum()).round(3)*100
Summary

Unnamed: 0_level_0,Seat_Equivalent_Mean,Seat_Equivalent_Median,% (Mean),% (Median)
Winning_Coalition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PH,107.36,116.72,49.9,49.9
PN,69.27,75.28,32.2,32.2
BN,21.29,23.18,9.9,9.9
GPS,13.12,14.27,6.1,6.1
GRS,4.11,4.46,1.9,1.9


In [10]:
#Group adjusted seats by party

Summary_Party = State_seats.groupby("Winning_Party")[["Seat_Equivalent_Mean", "Seat_Equivalent_Median"]].sum().sort_values(by="Seat_Equivalent_Median", ascending=False)
Summary_Party["% (Mean)"] = (Summary_Party["Seat_Equivalent_Mean"]/Summary_Party["Seat_Equivalent_Mean"].sum()).round(3)*100
Summary_Party["% (Median)"] = (Summary_Party["Seat_Equivalent_Median"]/Summary_Party["Seat_Equivalent_Median"].sum()).round(3)*100
Summary_Party

Unnamed: 0_level_0,Seat_Equivalent_Mean,Seat_Equivalent_Median,% (Mean),% (Median)
Winning_Party,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PAS,51.29,55.75,23.2,23.2
DAP,49.62,53.97,22.5,22.5
PKR,43.59,47.37,19.7,19.7
UMNO,18.68,20.32,8.5,8.5
BERSATU,17.98,19.53,8.1,8.1
AMANAH,12.46,13.55,5.6,5.6
PBB,7.93,8.64,3.6,3.6
WARISAN,2.65,2.87,1.2,1.2
SABAH BERSATU,2.27,2.46,1.0,1.0
PRS,2.1,2.27,1.0,0.9
