In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# The base URL of the website
BASE_URL = "https://election.adaderana.lk/presidential-election-2024/#_"

# Make an HTTP request to the base page
response = requests.get(BASE_URL)
soup = BeautifulSoup(response.text, 'html.parser')

# Array of districts in Sri Lanka
Districts = [
    'Colombo', 'Gampaha', 'Kalutara',  # Western Province
    'Galle', 'Matara', 'Hambantota',    # Southern Province
    'Mahanuwara', 'Matale', 'NuwaraEliya',  # Central Province
    'Digamadulla', 'Trincomalee', 'Batticaloa',  # Eastern Province
    'Anuradhapura', 'Polonnaruwa',  # North Central Province
    'Kegalle', 'Ratnapura',  # Sabaragamuwa Province
    'Kurunegala', 'Puttalam',  # Wayamba Province
    'Badulla', 'Moneragala',  # Uva Province
    'Jaffna', 'Vanni'  # Northern Province
]

for District in Districts:
    # Find the district link by its href
    district_link = soup.find('a', href=lambda href: href and f'dist_id={District}' in href)
    
    # Define the district name
    district_name = District

    if district_link:
        # Extract the href (link) from the tag
        district_href = district_link['href']
        
        # Construct the full URL if the href is relative
        full_url = requests.compat.urljoin(BASE_URL, district_href)
        
        # Now make a request to the new URL to scrape that page
        district_page = requests.get(full_url)

        # Parse the new page with BeautifulSoup
        district_soup = BeautifulSoup(district_page.text, 'html.parser')

        # Find the division links on the district page (hrefs with 'div_id')
        division_links = district_soup.find_all('a', href=lambda href: href and 'div_id' in href)
        
        # Dictionary to store candidate names, parties, and cumulative vote counts
        candidate_data = {}
        candidate_count = 0  # Counter to track the number of candidates scraped
        
        # Scrape each division
        for i, division_link in enumerate(division_links):
            division_href = division_link['href']
            division_url = requests.compat.urljoin(full_url, division_href)

            # Make a request to the division page
            division_page = requests.get(division_url)
            division_soup = BeautifulSoup(division_page.text, 'html.parser')

            # Extract the polling division name
            division_name = division_soup.find('h2').get_text(strip=True)

            # Extract the results for the candidates
            results = division_soup.find_all('div', class_='dis_ele_result_block')
            
            # Check if results exist
            if results:
                for result in results:
                    # Extract party and candidate name
                    party = result.find('div', class_='ele_party').find('span').get_text(strip=True)
                    candidate = result.find('div', class_='ele_party').get_text(strip=True).replace(party, '').strip()

                    # Extract vote count
                    vote_count = int(result.find('div', class_='ele_value ml-auto').find('span').get_text(strip=True).replace(',', ''))

                    # Store candidate names and parties, and their votes
                    if candidate not in candidate_data:
                        candidate_data[candidate] = {'Party': party, 'Total Votes': vote_count}
                        candidate_count += 1
                    else:
                        candidate_data[candidate]['Total Votes'] += vote_count
                    
                    # Stop scraping once we have data for 38 candidates
                    if candidate_count == 38:
                        break
                if candidate_count == 38:
                    break
            if candidate_count == 38:
                break

        # Determine the province based on the district
        province = ''
        if district_name in ('Colombo', 'Gampaha', 'Kalutara'):
            province = 'Western Province'
        elif district_name in ('Galle', 'Matara', 'Hambantota'):
            province = 'Southern Province'
        elif district_name in ('Mahanuwara', 'Matale', 'NuwaraEliya'):
            province = 'Central Province'
        elif district_name in ('Digamadulla', 'Trincomalee', 'Batticaloa'):
            province = 'Eastern Province'
        elif district_name in ('Anuradhapura', 'Polonnaruwa'):
            province = 'North Central Province'
        elif district_name in ('Kegalle', 'Ratnapura'):
            province = 'Sabaragamuwa Province'
        elif district_name in ('Kurunegala', 'Puttalam'):
            province = 'Wayamba Province'
        elif district_name in ('Badulla', 'Moneragala'):
            province = 'Uva Province'
        elif district_name in ('Jaffna', 'Vanni'):
            province = 'Northern Province'

        # Convert the data into a DataFrame for the final results
        data = {
            'Province': [],
            'District': [],
            'Candidate': [],
            'Party': [],
            'Total Votes': []
        }

        # Populate the DataFrame with the candidate data
        for candidate, info in candidate_data.items():
            data['Province'].append(province) 
            data['District'].append(district_name) 
            data['Candidate'].append(candidate)
            data['Party'].append(info['Party'])
            data['Total Votes'].append(info['Total Votes'] / 2)

        df = pd.DataFrame(data)

        # Sorting by total votes
        df_sorted = df.sort_values(by='Total Votes', ascending=True)

        # Final DataFrame containing only the 38 candidates with their total votes
        print(df_sorted)

        # Save to CSV file
        df_sorted.to_csv(f'{District}.csv', index=False)

    else:
        print(f"District link not found for {District}.")

            Province District                                      Candidate  \
37  Western Province  Colombo                        MYLVAGANAM THILAKARAJAH   
36  Western Province  Colombo                              SARATH MANAMENDRA   
35  Western Province  Colombo  PALLIMULLA KAPUGAMAGE ANURA SIDNEY JAYARATHNA   
34  Western Province  Colombo                               JANAKA RATNAYAKE   
33  Western Province  Colombo                    ANURUDDHA ROSHAN RANASINGHE   
32  Western Province  Colombo                                A.S.P. LIYANAGE   
31  Western Province  Colombo                                 K.A. KULARATNA   
30  Western Province  Colombo                           SARATH BANDARANAYAKE   
29  Western Province  Colombo                                LALITH DE SILVA   
28  Western Province  Colombo                         ABUBAKAR MOHAMED INFAS   
27  Western Province  Colombo                                 MAHINDA DEWAGE   
26  Western Province  Colombo           

In [7]:
df.head()

Unnamed: 0,Province,District,Candidate,Party,Total Votes
0,Northern Province,Vanni,SAJITH PREMADASA,SJB,14245.5
1,Northern Province,Vanni,RANIL WICKREMESINGHE,IND16,8590.5
2,Northern Province,Vanni,ARIYANETHIRAN PAKKIYASELVAM,IND9,5378.5
3,Northern Province,Vanni,ANURA KUMARA DISSANAYAKE,NPP,2138.0
4,Northern Province,Vanni,K.K. PIYADASA,IND4,401.0
