In [19]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL to scrape
url = "https://results.eci.gov.in/PcResultGenJune2024/index.htm"

# Make a request to the website
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the div with class "rslt-table table-responsive"
    result_div = soup.find('div', class_='rslt-table table-responsive')

    if result_div:
        # Find the table inside the div
        table = result_div.find('table')

        if table:
            # Extract all rows from the table
            rows = table.find_all('tr')

            # List to store the data from the "Leading" column
            leading_column_data = []

            # Loop through the rows and extract data from the "Leading" column
            for row in rows:
                columns = row.find_all('td')
                if len(columns) > 0:
                    # Assuming the "Leading" column is labeled as such, find the appropriate index
                    header_row = table.find('tr')
                    headers = [th.text.strip() for th in header_row.find_all('th')]
                    
                    if "Leading" in headers:
                        leading_index = headers.index("Leading")
                        leading_column = columns[leading_index]
                        leading_column_data.append(str(leading_column))

            # Join the leading_column_data list into a single string
            leading_column_html = ''.join(leading_column_data)

            # Parse the combined HTML string
            soup = BeautifulSoup(leading_column_html, 'html.parser')
            href_links = [a['href'] for a in soup.find_all('a')]


In [20]:
def extract_party_name(text):
  parts = text.split("(")
  if len(parts) > 1:
    party_name = parts[1].strip()
    party_name = party_name.split(")")[0].strip()
    return party_name
  else:
    return None

In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

dataframes = []
for link in href_links:
    url = "https://results.eci.gov.in/PcResultGenJune2024/" + link

    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        party = soup.find('div', class_="page-title").find('h2').find('span')
        result_div = soup.find('div', class_="table-responsive")
        if result_div:
            table = result_div.find('table')    
            headers = []
            for th in table.find_all('th'):
                headers.append(th.text.strip())
            rows = []
            for tr in table.find_all('tr')[1:]:  # Skip the header row
                cells = tr.find_all('td')
                row = [cell.text.strip() for cell in cells]
                rows.append(row)
            df = pd.DataFrame(rows, columns=headers)
            df["party"] = extract_party_name(party.text)
            dataframes.append(df)

big_df = pd.concat(dataframes, ignore_index=True)


In [22]:
big_df.sample(6)


Unnamed: 0,S.No,Parliament Constituency,Leading Candidate,Total Votes,Margin,party
226,227,East Delhi(3),HARSH MALHOTRA,527020,70614,Bharatiya Janata Party
220,221,Chevella(10),KONDA VISHWESHWAR REDDY,674831,155754,Bharatiya Janata Party
431,2,Sheohar(4),LOVELY ANAND,362377,28054,Janata Dal
92,93,UJJAIN(22),ANIL FIROJIYA,757933,351175,Bharatiya Janata Party
4,5,Arunachal East(2),TAPIR GAO,140715,31282,Bharatiya Janata Party
330,1,Kairana(2),IQRA CHOUDHARY,487602,70278,Samajwadi Party


In [23]:
big_df['Margin'] = pd.to_numeric(big_df['Margin'], errors='coerce')
sorted_df = big_df.sort_values(by='Margin', ascending=True)
sorted_df = sorted_df.reset_index(drop=True)
sorted_df.to_csv("Final_data.csv")
