## Import the necessary libraries

In [24]:
# Import BeautifulSoup from the bs4 library for parsing HTML and XML documents
from bs4 import BeautifulSoup

# Import the requests library for making HTTP requests to fetch web pages
import requests

# Import the pandas library for data manipulation and analysis
import pandas as pd

## Generates a list of URLs and corresponding state names from the input URL


In [25]:
def url_generation_party(u):
    """
    Generates a list of URLs and corresponding state names based on the options found in the given URL.
    
    Args:
        u (str): The URL to fetch the HTML content from.
        
    Returns:
        - urls (list): A list of generated URLs.
        - states (list): A list of state names corresponding to the URLs.
    """
    # Base URL for generating party-wise result links
    url_base = "https://results.eci.gov.in/PcResultGenJune2024/partywiseresult-"

    # Make an HTTP GET request to fetch the HTML content of the given URL
    page = requests.get(u)

    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(page.text,'html')

    # Find all 'option' tags in the parsed HTML
    options = soup.find_all('option')

    # Extract the 'value' attribute from each 'option' tag, skipping the first option
    values = values = [tag['value'] for tag in options]

    # Generate URLs by appending each value to the base URL
    urls = [url_base+i+".htm" for i in values[1:]]

    # Extract the text content of each 'option' tag, stripping leading/trailing whitespace, and skipping the first option
    states = [i.text.strip() for i in options[1:]]
    return urls,states

## Generates a list of URLs and corresponding constituency names based on the options found in the given URL

In [26]:
def url_generation_cand(u):
    """
    Generates a list of URLs and corresponding constituency names based on the options found in the given URL.
    
    Args:
        u (str): The URL to fetch the HTML content from.
        
    Returns:
        - urls (list): A list of generated URLs for each constituency.
        - constituents (list): A list of constituency names corresponding to the URLs.
    """
    # Base URL for generating constituency-wise result links
    url_base = "https://results.eci.gov.in/PcResultGenJune2024/Constituencywise"

    # Make an HTTP GET request to fetch the HTML content of the given URL
    page = requests.get(u)

    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(page.text,'html')

    # Find all 'option' tags in the parsed HTML
    options = soup.find_all('option')

    # Extract the 'value' attribute from each 'option' tag, skipping the first option
    values = values = [tag['value'] for tag in options]

    # Generate URLs by appending each value to the base URL
    urls = [url_base+i+".htm" for i in values[1:]]

    # Extract the text content of each 'option' tag, stripping leading/trailing whitespace, and skipping the first option
    constituents = [i.text.strip() for i in options[1:]]
    return urls,constituents

In [27]:
# Initialize lists to store state and constituency names
state = []
const = []

# Generate state-level URLs and corresponding state names
urls_state,states_list = url_generation_party("https://results.eci.gov.in/PcResultGenJune2024/index.htm")

# Define the headers for the DataFrame
headers = ['Candidate', 'Party', 'EVM Votes', 'Postal Votes']
df = pd.DataFrame(columns=headers)

# Loop through each state URL
for i in urls_state:
    # Generate constituency-level URLs and corresponding constituency names for each state
    urls_cons,cons = url_generation_cand(i)

    # Loop through each constituency URL
    for j in urls_cons:
        # Make an HTTP GET request to fetch the HTML content of the constituency URL
        page_t = requests.get(j)

        # Parse the HTML content using BeautifulSoup
        soup_t = BeautifulSoup(page_t.text,'html')

        # Find all 'tr' tags (table rows) in the parsed HTML
        raw_rows = soup_t.find_all('tr')
        
        # col_head_raw = raw_rows[0].find_all('th')
        # headers = [i.text.strip() for i in col_head_raw[1:-2]]
        
        # Extract and store data from each row
        for k in raw_rows[1:-1]:
            values_raw = k.find_all('td')
            values = [p.text.strip() for p in values_raw[1:-2]]
            
            # Append the row to the DataFrame
            l = len(df)
            df.loc[l] = values
            
            # Append the corresponding constituency and state names
            const.append(cons[urls_cons.index(j)])
            state.append(states_list[urls_state.index(i)])

In [28]:
# Add the state and constituency columns to the DataFrame
df['state'] = state
df['pc_name'] = const
print(df)

               Candidate                                        Party  \
0        BISHNU PADA RAY                       Bharatiya Janata Party   
1     KULDEEP RAI SHARMA                     Indian National Congress   
2             MANOJ PAUL          Andaman Nicobar Democratic Congress   
3             D AYYAPPAN          Communist Party of India  (Marxist)   
4        V.K. ABDUL AZIZ                                  Independent   
...                  ...                                          ...   
8897     SK. SAPIYAR ALI                                  Independent   
8898         NIKHIL BERA  Socialist Unity Centre Of India (COMMUNIST)   
8899   AMAL KUMAR BARMAN                                  Independent   
8900    RAMPRASAD GHORAI                          Indian Unity Centre   
8901                NOTA                            None of the Above   

     EVM Votes Postal Votes                      state  \
0       102182          254  Andaman & Nicobar Islands   
1      

In [31]:
df

Unnamed: 0,Candidate,Party,EVM Votes,Postal Votes,state,pc_name
0,BISHNU PADA RAY,Bharatiya Janata Party,102182,254,Andaman & Nicobar Islands,Andaman & Nicobar Islands - 1
1,KULDEEP RAI SHARMA,Indian National Congress,77829,211,Andaman & Nicobar Islands,Andaman & Nicobar Islands - 1
2,MANOJ PAUL,Andaman Nicobar Democratic Congress,8236,18,Andaman & Nicobar Islands,Andaman & Nicobar Islands - 1
3,D AYYAPPAN,Communist Party of India (Marxist),6009,8,Andaman & Nicobar Islands,Andaman & Nicobar Islands - 1
4,V.K. ABDUL AZIZ,Independent,2195,8,Andaman & Nicobar Islands,Andaman & Nicobar Islands - 1
...,...,...,...,...,...,...
8897,SK. SAPIYAR ALI,Independent,2924,-,West Bengal,Uluberia - 26
8898,NIKHIL BERA,Socialist Unity Centre Of India (COMMUNIST),2095,12,West Bengal,Uluberia - 26
8899,AMAL KUMAR BARMAN,Independent,1997,3,West Bengal,Uluberia - 26
8900,RAMPRASAD GHORAI,Indian Unity Centre,1568,4,West Bengal,Uluberia - 26


## Export to CSV

In [29]:
# Save the DataFrame to a CSV file
df.to_csv('results2024.csv',index = False)
