In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# The base URL of the website
BASE_URL = "https://election.adaderana.lk/presidential-election-2024/#_"

# Make an HTTP request to the base page
response = requests.get(BASE_URL)
soup = BeautifulSoup(response.text, 'html.parser')

# Array of districts in Sri Lanka
Districts = [
    'Colombo', 'Gampaha', 'Kalutara',  # Western Province
    'Galle', 'Matara', 'Hambantota',    # Southern Province
    'Mahanuwara', 'Matale', 'NuwaraEliya',  # Central Province
    'Digamadulla', 'Trincomalee', 'Batticaloa',  # Eastern Province
    'Anuradhapura', 'Polonnaruwa',  # North Central Province
    'Kegalle', 'Ratnapura',  # Sabaragamuwa Province
    'Kurunegala', 'Puttalam',  # Wayamba Province
    'Badulla', 'Moneragala',  # Uva Province
    'Jaffna', 'Vanni'  # Northern Province
]

# Create an empty list to store all district DataFrames
all_data = []

for District in Districts:
    # Find the district link by its href
    district_link = soup.find('a', href=lambda href: href and f'dist_id={District}' in href)
    
    # Define the district name
    district_name = District

    if district_link:
        # Extract the href (link) from the tag
        district_href = district_link['href']
        
        # Construct the full URL if the href is relative
        full_url = requests.compat.urljoin(BASE_URL, district_href)
        
        # Now make a request to the new URL to scrape that page
        district_page = requests.get(full_url)

        # Parse the new page with BeautifulSoup
        district_soup = BeautifulSoup(district_page.text, 'html.parser')

        # Find the division links on the district page (hrefs with 'div_id')
        division_links = district_soup.find_all('a', href=lambda href: href and 'div_id' in href)
        
        # Dictionary to store candidate names, parties, and cumulative vote counts
        candidate_data = {}
        candidate_names = []  # List to store candidate names once after scraping the first division
        candidate_parties = []  # List to store candidate parties once after scraping the first division
        candidate_count = 0 
        
        # Scrape each division
        for i, division_link in enumerate(division_links):
            division_href = division_link['href']
            division_url = requests.compat.urljoin(full_url, division_href)

            # Make a request to the division page
            division_page = requests.get(division_url)
            division_soup = BeautifulSoup(division_page.text, 'html.parser')

            # Extract the polling division name
            division_name = division_soup.find('h2').get_text(strip=True)

            # Extract the results for the candidates
            results = division_soup.find_all('div', class_='dis_ele_result_block')
            
            # Check if results exist
            if results:
                for result in results:
                    # Extract party and candidate name
                    party = result.find('div', class_='ele_party').find('span').get_text(strip=True)
                    candidate = result.find('div', class_='ele_party').get_text(strip=True).replace(party, '').strip()

                    # Extract vote count
                    vote_count = int(result.find('div', class_='ele_value ml-auto').find('span').get_text(strip=True).replace(',', ''))

                    # If it's the first division, store candidate names and parties, and their votes
                    if i == 0:
                        candidate_names.append(candidate)
                        candidate_parties.append(party)
                        candidate_data[candidate] = {'Party': party, 'Total Votes': vote_count}
                        candidate_count += 1

                        # If the candidate count reaches 38, stop scraping further for this division
                        if candidate_count >= 38:
                            break
                    else:
                        # For subsequent divisions, check if the candidate and party match the stored data
                        if candidate in candidate_names and candidate_data[candidate]['Party'] == party:
                            candidate_data[candidate]['Total Votes'] += vote_count

        # Determine the province based on the district
        province = ''
        if district_name in ('Colombo', 'Gampaha', 'Kalutara'):
            province = 'Western Province'
        elif district_name in ('Galle', 'Matara', 'Hambantota'):
            province = 'Southern Province'
        elif district_name in ('Mahanuwara', 'Matale', 'NuwaraEliya'):
            province = 'Central Province'
        elif district_name in ('Digamadulla', 'Trincomalee', 'Batticaloa'):
            province = 'Eastern Province'
        elif district_name in ('Anuradhapura', 'Polonnaruwa'):
            province = 'North Central Province'
        elif district_name in ('Kegalle', 'Ratnapura'):
            province = 'Sabaragamuwa Province'
        elif district_name in ('Kurunegala', 'Puttalam'):
            province = 'Wayamba Province'
        elif district_name in ('Badulla', 'Moneragala'):
            province = 'Uva Province'
        elif district_name in ('Jaffna', 'Vanni'):
            province = 'Northern Province'

        # Convert the data into a DataFrame for the final results
        data = {
            'Province': [],
            'District': [],
            'Candidate': [],
            'Party': [],
            'Total Votes': []
        }

        # Populate the DataFrame with the candidate data
        for candidate, info in candidate_data.items():
            data['Province'].append(province) 
            data['District'].append(district_name) 
            data['Candidate'].append(candidate)
            data['Party'].append(info['Party'])
            data['Total Votes'].append(info['Total Votes'] / 2)

        df = pd.DataFrame(data)

        # Sorting by total votes
        df_sorted = df.sort_values(by='Total Votes', ascending=False)
        df_sorted['Year'] = 2024
        df_sorted["Total Precentage"] = (df_sorted['Total Votes']/df_sorted['Total Votes'].sum()) * 100
        
        # Append the sorted DataFrame to the list of all district data
        all_data.append(df_sorted)

    else:
        print(f"District link not found for {District}.")

# Concatenate all district data into one DataFrame
final_df = pd.concat(all_data, ignore_index=True)

# Save the combined data to one CSV file
final_df.to_csv('SriLanka_ElectionResults_2024.csv', index=False)

print("All districts' election results saved in one file.")

All districts' election results saved in one file.


In [127]:
df = pd.read_csv("SriLanka_ElectionResults_2024.csv")

In [128]:
df.head()

Unnamed: 0,Province,District,Candidate,Party,Total Votes,Year,Total Precentage
0,Western Province,Colombo,ANURA KUMARA DISSANAYAKE,NPP,629963.0,2024,47.209811
1,Western Province,Colombo,SAJITH PREMADASA,SJB,342108.0,2024,25.637782
2,Western Province,Colombo,RANIL WICKREMESINGHE,IND16,281436.0,2024,21.090985
3,Western Province,Colombo,NAMAL RAJAPAKSA,SLPP,30432.0,2024,2.280593
4,Western Province,Colombo,DILITH JAYAWEERA,SLCP,26356.0,2024,1.975135


In [129]:
df["Total Precentage"]

0      47.209811
1      25.637782
2      21.090985
3       2.280593
4       1.975135
         ...    
831     0.034519
832     0.033599
833     0.033139
834     0.032218
835     0.018871
Name: Total Precentage, Length: 836, dtype: float64

In [130]:
import numpy as np

df['Considering Votes'] = np.where(df['Total Precentage'] >= 5, df['Total Votes'], 0)

In [131]:
df.head()


Unnamed: 0,Province,District,Candidate,Party,Total Votes,Year,Total Precentage,Considering Votes
0,Western Province,Colombo,ANURA KUMARA DISSANAYAKE,NPP,629963.0,2024,47.209811,629963.0
1,Western Province,Colombo,SAJITH PREMADASA,SJB,342108.0,2024,25.637782,342108.0
2,Western Province,Colombo,RANIL WICKREMESINGHE,IND16,281436.0,2024,21.090985,281436.0
3,Western Province,Colombo,NAMAL RAJAPAKSA,SLPP,30432.0,2024,2.280593,0.0
4,Western Province,Colombo,DILITH JAYAWEERA,SLCP,26356.0,2024,1.975135,0.0


In [132]:
# Ensure that the TotalVotesByDistrict column exists
df['TotalVotesByDistrict'] = df.groupby('District')['Considering Votes'].transform('sum')

# Calculate Seating Precentage for each candidate
df['Seating Precentage'] = (df['Considering Votes'] / df['TotalVotesByDistrict']) * 100

In [133]:
df.head()

Unnamed: 0,Province,District,Candidate,Party,Total Votes,Year,Total Precentage,Considering Votes,TotalVotesByDistrict,Seating Precentage
0,Western Province,Colombo,ANURA KUMARA DISSANAYAKE,NPP,629963.0,2024,47.209811,629963.0,1253507.0,50.256042
1,Western Province,Colombo,SAJITH PREMADASA,SJB,342108.0,2024,25.637782,342108.0,1253507.0,27.292069
2,Western Province,Colombo,RANIL WICKREMESINGHE,IND16,281436.0,2024,21.090985,281436.0,1253507.0,22.451889
3,Western Province,Colombo,NAMAL RAJAPAKSA,SLPP,30432.0,2024,2.280593,0.0,1253507.0,0.0
4,Western Province,Colombo,DILITH JAYAWEERA,SLCP,26356.0,2024,1.975135,0.0,1253507.0,0.0


In [134]:
# Create a dictionary for the allocated seats per district
seats_dict = {
    'Colombo': 18,
    'Gampaha': 19,
    'Kalutara': 11,
    'Kandy': 12,
    'Matale': 5,
    'Nuwara Eliya': 8,
    'Galle': 9,
    'Matara': 7,
    'Hambantota': 7,
    'Jaffna': 6,
    'Vanni': 6,
    'Batticaloa': 5,
    'Digamadulla': 7,
    'Trincomalee': 4,
    'Kurunegala': 15,
    'Puttalam': 8,
    'Anuradhapura': 9,
    'Polonnaruwa': 5,
    'Badulla': 9,
    'Monaragala': 6,
    'Ratnapura': 11,
    'Kegalle': 9
}

# Assuming your dataframe has a 'District' column
df['Allocated Seats'] = df['District'].map(seats_dict)

In [135]:
df["Bonus"] = 1
df.head()

Unnamed: 0,Province,District,Candidate,Party,Total Votes,Year,Total Precentage,Considering Votes,TotalVotesByDistrict,Seating Precentage,Allocated Seats,Bonus
0,Western Province,Colombo,ANURA KUMARA DISSANAYAKE,NPP,629963.0,2024,47.209811,629963.0,1253507.0,50.256042,18.0,1
1,Western Province,Colombo,SAJITH PREMADASA,SJB,342108.0,2024,25.637782,342108.0,1253507.0,27.292069,18.0,1
2,Western Province,Colombo,RANIL WICKREMESINGHE,IND16,281436.0,2024,21.090985,281436.0,1253507.0,22.451889,18.0,1
3,Western Province,Colombo,NAMAL RAJAPAKSA,SLPP,30432.0,2024,2.280593,0.0,1253507.0,0.0,18.0,1
4,Western Province,Colombo,DILITH JAYAWEERA,SLCP,26356.0,2024,1.975135,0.0,1253507.0,0.0,18.0,1


In [136]:
# Create a new column for the whole number part
df['Seating Whole'] = df['Seating Precentage'].astype(int)

# Create a new column for the decimal part, keeping it as 0.xx
df['Seating Decimal'] = df['Seating Precentage'] - df['Seating Whole']

# Format the Seating Precentage column with a percentage sign
df['Seating Precentage Display'] = df['Seating Precentage'].apply(lambda x: f"{x:.2f}%")

# Example for formatting the whole and decimal parts with percentage signs
df['Seating Whole Display'] = df['Seating Whole'].apply(lambda x: f"{x}%")
df['Seating Decimal Display'] = df['Seating Decimal'].apply(lambda x: f"{x:.2f}%")

df['Seating Whole Numeric'] = df['Seating Precentage Display'].str.rstrip('%').astype('float') / 100
df['Seats Won'] = (df['Allocated Seats'] - 1) * df['Seating Whole Numeric']
df.head()

Unnamed: 0,Province,District,Candidate,Party,Total Votes,Year,Total Precentage,Considering Votes,TotalVotesByDistrict,Seating Precentage,Allocated Seats,Bonus,Seating Whole,Seating Decimal,Seating Precentage Display,Seating Whole Display,Seating Decimal Display,Seating Whole Numeric,Seats Won
0,Western Province,Colombo,ANURA KUMARA DISSANAYAKE,NPP,629963.0,2024,47.209811,629963.0,1253507.0,50.256042,18.0,1,50,0.256042,50.26%,50%,0.26%,0.5026,8.5442
1,Western Province,Colombo,SAJITH PREMADASA,SJB,342108.0,2024,25.637782,342108.0,1253507.0,27.292069,18.0,1,27,0.292069,27.29%,27%,0.29%,0.2729,4.6393
2,Western Province,Colombo,RANIL WICKREMESINGHE,IND16,281436.0,2024,21.090985,281436.0,1253507.0,22.451889,18.0,1,22,0.451889,22.45%,22%,0.45%,0.2245,3.8165
3,Western Province,Colombo,NAMAL RAJAPAKSA,SLPP,30432.0,2024,2.280593,0.0,1253507.0,0.0,18.0,1,0,0.0,0.00%,0%,0.00%,0.0,0.0
4,Western Province,Colombo,DILITH JAYAWEERA,SLCP,26356.0,2024,1.975135,0.0,1253507.0,0.0,18.0,1,0,0.0,0.00%,0%,0.00%,0.0,0.0


In [137]:
# Correct way to drop columns
df = df.drop('Seating Whole', axis=1)
df = df.drop('Seating Decimal', axis=1)
df = df.drop('Seating Whole Display', axis=1)
df = df.drop('Seating Decimal Display', axis=1)
df = df.drop('Seating Precentage', axis=1)
df.head()

Unnamed: 0,Province,District,Candidate,Party,Total Votes,Year,Total Precentage,Considering Votes,TotalVotesByDistrict,Allocated Seats,Bonus,Seating Precentage Display,Seating Whole Numeric,Seats Won
0,Western Province,Colombo,ANURA KUMARA DISSANAYAKE,NPP,629963.0,2024,47.209811,629963.0,1253507.0,18.0,1,50.26%,0.5026,8.5442
1,Western Province,Colombo,SAJITH PREMADASA,SJB,342108.0,2024,25.637782,342108.0,1253507.0,18.0,1,27.29%,0.2729,4.6393
2,Western Province,Colombo,RANIL WICKREMESINGHE,IND16,281436.0,2024,21.090985,281436.0,1253507.0,18.0,1,22.45%,0.2245,3.8165
3,Western Province,Colombo,NAMAL RAJAPAKSA,SLPP,30432.0,2024,2.280593,0.0,1253507.0,18.0,1,0.00%,0.0,0.0
4,Western Province,Colombo,DILITH JAYAWEERA,SLCP,26356.0,2024,1.975135,0.0,1253507.0,18.0,1,0.00%,0.0,0.0


In [138]:
# Fill NaN values with 0 before converting to int
df["Seats Won Whole"] = df["Seats Won"].fillna(0).astype(int)
df["Seats Won Decimal"] = df["Seats Won"] - df["Seats Won Whole"]
df.head()

Unnamed: 0,Province,District,Candidate,Party,Total Votes,Year,Total Precentage,Considering Votes,TotalVotesByDistrict,Allocated Seats,Bonus,Seating Precentage Display,Seating Whole Numeric,Seats Won,Seats Won Whole,Seats Won Decimal
0,Western Province,Colombo,ANURA KUMARA DISSANAYAKE,NPP,629963.0,2024,47.209811,629963.0,1253507.0,18.0,1,50.26%,0.5026,8.5442,8,0.5442
1,Western Province,Colombo,SAJITH PREMADASA,SJB,342108.0,2024,25.637782,342108.0,1253507.0,18.0,1,27.29%,0.2729,4.6393,4,0.6393
2,Western Province,Colombo,RANIL WICKREMESINGHE,IND16,281436.0,2024,21.090985,281436.0,1253507.0,18.0,1,22.45%,0.2245,3.8165,3,0.8165
3,Western Province,Colombo,NAMAL RAJAPAKSA,SLPP,30432.0,2024,2.280593,0.0,1253507.0,18.0,1,0.00%,0.0,0.0,0,0.0
4,Western Province,Colombo,DILITH JAYAWEERA,SLCP,26356.0,2024,1.975135,0.0,1253507.0,18.0,1,0.00%,0.0,0.0,0,0.0


In [140]:
def allocate_remaining_seats(group):
    allocated_seats = group["Allocated Seats"].fillna(0).iloc[0]  # Use the first value since it's the same for all rows in the group
    total_seats_won_whole = group["Seats Won Whole"].sum()
    
    # Calculate remaining seats to allocate
    remaining_seats = int(allocated_seats - 1 - total_seats_won_whole)  # Subtract 1 to exclude the bonus seat
    
    # If there are remaining seats, allocate based on decimal values
    if remaining_seats > 0:
        # Sort by the decimal values in descending order
        group = group.sort_values(by="Seats Won Decimal", ascending=False)
        
        # Allocate remaining seats to candidates with highest decimal values
        group["Remaining Seats"] = 0  # Initialize column for remaining seats
        for i in range(remaining_seats):
            group.iloc[i, group.columns.get_loc("Remaining Seats")] += 1

    return group

# Apply the function to each district
df["Remaining Seats"] = 0  # Initialize the column first to avoid NaN errors
df = df.groupby("District", group_keys=False).apply(allocate_remaining_seats)

# Calculate final total seats won
df["Final Seats Won"] = df["Seats Won Whole"] + df["Remaining Seats"]

# Display the updated dataframe
df.head()

Unnamed: 0,Province,District,Candidate,Party,Total Votes,Year,Total Precentage,Considering Votes,TotalVotesByDistrict,Allocated Seats,Bonus,Seating Precentage Display,Seating Whole Numeric,Seats Won,Seats Won Whole,Seats Won Decimal,Remaining Seats,Final Seats Won
457,North Central Province,Anuradhapura,SAJITH PREMADASA,SJB,202289.0,2024,33.508977,202289.0,570385.0,9.0,1,35.47%,0.3547,2.8376,2,0.8376,1,3
458,North Central Province,Anuradhapura,RANIL WICKREMESINGHE,IND16,82152.0,2024,13.608399,82152.0,570385.0,9.0,1,14.40%,0.144,1.152,1,0.152,0,1
456,North Central Province,Anuradhapura,ANURA KUMARA DISSANAYAKE,NPP,285944.0,2024,47.366346,285944.0,570385.0,9.0,1,50.13%,0.5013,4.0104,4,0.0104,0,4
492,North Central Province,Anuradhapura,SARATH MANAMENDRA,NSU,69.0,2024,0.01143,0.0,570385.0,9.0,1,0.00%,0.0,0.0,0,0.0,0,0
483,North Central Province,Anuradhapura,KEERTHIE WICKRAMARATNE,AJP,168.0,2024,0.027829,0.0,570385.0,9.0,1,0.00%,0.0,0.0,0,0.0,0,0


In [141]:
df.head()

Unnamed: 0,Province,District,Candidate,Party,Total Votes,Year,Total Precentage,Considering Votes,TotalVotesByDistrict,Allocated Seats,Bonus,Seating Precentage Display,Seating Whole Numeric,Seats Won,Seats Won Whole,Seats Won Decimal,Remaining Seats,Final Seats Won
457,North Central Province,Anuradhapura,SAJITH PREMADASA,SJB,202289.0,2024,33.508977,202289.0,570385.0,9.0,1,35.47%,0.3547,2.8376,2,0.8376,1,3
458,North Central Province,Anuradhapura,RANIL WICKREMESINGHE,IND16,82152.0,2024,13.608399,82152.0,570385.0,9.0,1,14.40%,0.144,1.152,1,0.152,0,1
456,North Central Province,Anuradhapura,ANURA KUMARA DISSANAYAKE,NPP,285944.0,2024,47.366346,285944.0,570385.0,9.0,1,50.13%,0.5013,4.0104,4,0.0104,0,4
492,North Central Province,Anuradhapura,SARATH MANAMENDRA,NSU,69.0,2024,0.01143,0.0,570385.0,9.0,1,0.00%,0.0,0.0,0,0.0,0,0
483,North Central Province,Anuradhapura,KEERTHIE WICKRAMARATNE,AJP,168.0,2024,0.027829,0.0,570385.0,9.0,1,0.00%,0.0,0.0,0,0.0,0,0


In [143]:
def add_bonus_seat(group):
    # Find the row with the highest 'Final Seats Won'
    max_seat_index = group["Final Seats Won"].idxmax()
    
    # Add 1 to the highest 'Final Seats Won' for that candidate
    group.loc[max_seat_index, "Final Seats Won"] += 1
    
    return group

# Apply the function to each district
df = df.groupby("District", group_keys=False).apply(add_bonus_seat)

# Display the updated dataframe with the bonus seats added
df.head()

Unnamed: 0,Province,District,Candidate,Party,Total Votes,Year,Total Precentage,Considering Votes,TotalVotesByDistrict,Allocated Seats,Bonus,Seating Precentage Display,Seating Whole Numeric,Seats Won,Seats Won Whole,Seats Won Decimal,Remaining Seats,Final Seats Won
457,North Central Province,Anuradhapura,SAJITH PREMADASA,SJB,202289.0,2024,33.508977,202289.0,570385.0,9.0,1,35.47%,0.3547,2.8376,2,0.8376,1,3
458,North Central Province,Anuradhapura,RANIL WICKREMESINGHE,IND16,82152.0,2024,13.608399,82152.0,570385.0,9.0,1,14.40%,0.144,1.152,1,0.152,0,1
456,North Central Province,Anuradhapura,ANURA KUMARA DISSANAYAKE,NPP,285944.0,2024,47.366346,285944.0,570385.0,9.0,1,50.13%,0.5013,4.0104,4,0.0104,0,5
492,North Central Province,Anuradhapura,SARATH MANAMENDRA,NSU,69.0,2024,0.01143,0.0,570385.0,9.0,1,0.00%,0.0,0.0,0,0.0,0,0
483,North Central Province,Anuradhapura,KEERTHIE WICKRAMARATNE,AJP,168.0,2024,0.027829,0.0,570385.0,9.0,1,0.00%,0.0,0.0,0,0.0,0,0


In [144]:
df.to_csv('Final Seats.csv', index=False)