In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pds

In [2]:
def webScrape(links):
  unique_urls = set()
  for link in links:
    response = requests.get(link)
    html_content = response.content
    soup = BeautifulSoup(html_content, 'html.parser')
    buttons = soup.find_all('a')
    url = {button['href'] for button in buttons if 'href' in button.attrs}
    unique_urls.update(url)

    for link in url:
        print(link)
    print(f"Total Links: {len(unique_urls)}")
  return unique_urls

In [3]:
url = ['https://results.eci.gov.in/']
urls = webScrape(url)

https://apps.apple.com/in/app/voter-helpline/id1456535004
https://results.eci.gov.in/AcResultGenJune2024/index.htm
https://results.eci.gov.in/PcResultGenJune2024/index.htm
https://play.google.com/store/apps/details?id=com.eci.citizen
https://results.eci.gov.in/AcResultGen2ndJune2024/index.htm
index.htm
https://results.eci.gov.in/AcResultByeJune2024/
Total Links: 7


In [5]:
filtered_urls = [url for url in urls if url.endswith('/index.htm')]
print(filtered_urls)

['https://results.eci.gov.in/AcResultGen2ndJune2024/index.htm', 'https://results.eci.gov.in/AcResultGenJune2024/index.htm', 'https://results.eci.gov.in/PcResultGenJune2024/index.htm']


In [14]:
def fetch_data(url):
    response = requests.get(url)
    if response.status_code == 200:
        page_content = response.content
        soup = BeautifulSoup(page_content, 'html.parser')
        return soup
    else:
        print(f"Failed to retrieve {url}")
        return None

def categorize_tables(soup, url):
    class_table = []
    non_class_table = []
    
    tables = soup.find_all('table')
    for table in tables:
        if 'table' in table.get('class', []):
            class_table.append(url) 
        else:
            non_class_table.append(table)
    
    return class_table, non_class_table

def extract_data(tables, category):
    print(f"\nExtracting data from {category} tables...")
    for table in tables:
        print(table) 

# Traverse each URL
for url in filtered_urls:
    print(f"Fetching data from {url}")
    soup = fetch_data(url)
    if soup:
        class_tables, non_class_tables = categorize_tables(soup, url)
        
        # Print the URLs of class tables
        print(f"Class tables found in {url}:")
        print()
        
        for table_url in class_tables:
            print(table_url)


Fetching data from https://results.eci.gov.in/AcResultGen2ndJune2024/index.htm
Class tables found in https://results.eci.gov.in/AcResultGen2ndJune2024/index.htm:

Fetching data from https://results.eci.gov.in/AcResultGenJune2024/index.htm
Class tables found in https://results.eci.gov.in/AcResultGenJune2024/index.htm:

Fetching data from https://results.eci.gov.in/PcResultGenJune2024/index.htm
Class tables found in https://results.eci.gov.in/PcResultGenJune2024/index.htm:

https://results.eci.gov.in/PcResultGenJune2024/index.htm


In [13]:
print(class_tables)

['https://results.eci.gov.in/PcResultGenJune2024/index.htm']


In [16]:
import csv
import os

def fetch_data(url):
    response = requests.get(url)
    if response.status_code == 200:
        page_content = response.content
        soup = BeautifulSoup(page_content, 'html.parser')
        return soup
    else:
        print(f"Failed to retrieve {url}")
        return None

def extract_data_to_csv(soup, filename):
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        tables = soup.find_all('table')
        for table in tables:
            if table.find('th'): 
                headers = [th.text.strip() for th in table.find_all('th')]
                writer.writerow(headers)

                rows = table.find_all('tr')[1:]  
                for row in rows:
                    columns = row.find_all('td')
                    row_data = [column.text.strip() for column in columns]
                    writer.writerow(row_data)

output_folder = 'csv_files'
os.makedirs(output_folder, exist_ok=True)

for url in class_tables:
    print(f"Fetching data from {url}")
    soup = fetch_data(url)
    if soup:
        filename = os.path.join(output_folder, f"{url.split('/')[-2]}.csv")  
        extract_data_to_csv(soup, filename)
        print(f"Data written to {filename}")


Fetching data from https://results.eci.gov.in/PcResultGenJune2024/index.htm
Data written to csv_files\PcResultGenJune2024.csv


In [39]:
url = 'https://results.eci.gov.in/PcResultGenJune2024/index.htm'

response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
dropdown = soup.find('select', id='ctl00_ContentPlaceHolder1_Result1_ddlState')
    

In [40]:
print(dropdown)

<select id="ctl00_ContentPlaceHolder1_Result1_ddlState" name="state" onchange="return GetResult(this)"> <option value=""> Select State Wise </option><option value="U01">Andaman &amp; Nicobar Islands</option><option value="S01">Andhra Pradesh</option><option value="S02">Arunachal Pradesh</option><option value="S03">Assam</option><option value="S04">Bihar</option><option value="U02">Chandigarh</option><option value="S26">Chhattisgarh</option><option value="U03">Dadra &amp; Nagar Haveli and Daman &amp; Diu</option><option value="S05">Goa</option><option value="S06">Gujarat</option><option value="S07">Haryana</option><option value="S08">Himachal Pradesh</option><option value="U08">Jammu and Kashmir</option><option value="S27">Jharkhand</option><option value="S10">Karnataka</option><option value="S11">Kerala</option><option value="U09">Ladakh</option><option value="U06">Lakshadweep</option><option value="S12">Madhya Pradesh</option><option value="S13">Maharashtra</option><option value="S14"

In [49]:
bt =[]
if dropdown:
        options = dropdown.find_all('option')
        for option in options:
            option_value = option.get('value')
            if option_value:  # Check if option_value is not None or empty
                print(option_value)
                bt.append(option_value)
else:
    print("Dropdown not found.")

U01
S01
S02
S03
S04
U02
S26
U03
S05
S06
S07
S08
U08
S27
S10
S11
U09
U06
S12
S13
S14
S15
S16
S17
U05
S18
U07
S19
S20
S21
S22
S29
S23
S24
S28
S25


In [50]:
print(bt)

['U01', 'S01', 'S02', 'S03', 'S04', 'U02', 'S26', 'U03', 'S05', 'S06', 'S07', 'S08', 'U08', 'S27', 'S10', 'S11', 'U09', 'U06', 'S12', 'S13', 'S14', 'S15', 'S16', 'S17', 'U05', 'S18', 'U07', 'S19', 'S20', 'S21', 'S22', 'S29', 'S23', 'S24', 'S28', 'S25']
