In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin


def extract_links_with_keywords(base_url, keyword):
    response = requests.get(base_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        links = soup.find_all('a', href=True)
        filtered_links = [urljoin(base_url, link['href']) for link in links if keyword in link['href']]
        return filtered_links
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return []

def extract_table_from_url(url):
    response = requests.get(url)
    if response.status_code == 200:
        # Using pandas read_html to directly get all tables in the page
        dataframes = pd.read_html(response.content)
        return dataframes
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return []

def get_table_links(base_url):
    response = requests.get(base_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        main_table = soup.find('table')
        rows = main_table.find_all('tr')

        link_data = []
        for row in rows:
            cells = row.find_all('td')
            row_data = [cell.text.strip() for cell in cells]
            link = row.find('a', href=True)
            if link:
                row_data.append(urljoin(base_url, link['href']))
            link_data.append(row_data)

        return link_data
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return []

def create_dataframes_from_rows(main_table_df, links_data):
    all_dataframes = []
    for i, row in main_table_df.iterrows():
        row_data = row.tolist()
        link = links_data[i][-1] if links_data[i] and len(links_data[i]) > len(row_data) else None
        if link:
            tables = extract_table_from_url(link)
            if tables:
                for table in tables:
                    # Create a new header that combines row data and the table headers
                    combined_headers = pd.MultiIndex.from_arrays([row_data + [""] * (len(table.columns) - len(row_data)), table.columns])
                    new_df = pd.DataFrame(table.values, columns=combined_headers)
                    all_dataframes.append(new_df)
    return all_dataframes

In [None]:
# THESE ARE THE PARTY WISE RESULTS OF LOK SABHA ELECTIONS 2024

base_url="https://results.eci.gov.in/PcResultGenJune2024/index.htm"

df=extract_table_from_url(base_url)



print(df)


[                                                Party  Won  Leading  Total
0                        Bharatiya Janata Party - BJP  240        0    240
1                      Indian National Congress - INC   99        0     99
2                                Samajwadi Party - SP   37        0     37
3                 All India Trinamool Congress - AITC   29        0     29
4                     Dravida Munnetra Kazhagam - DMK   22        0     22
5                                  Telugu Desam - TDP   16        0     16
6                         Janata Dal (United) - JD(U)   12        0     12
7      Shiv Sena (Uddhav Balasaheb Thackrey) - SHSUBT    9        0      9
8   Nationalist Congress Party – Sharadchandra Paw...    8        0      8
9                                     Shiv Sena - SHS    7        0      7
10             Lok Janshakti Party(Ram Vilas) - LJPRV    5        0      5
11      Yuvajana Sramika Rythu Congress Party - YSRCP    4        0      4
12                      

In [None]:
# THESE ARE THE CANDIDATE WISE RESULTS FROM EACH POLITICAL PARTY

base_url = "https://results.eci.gov.in/PcResultGenJune2024/index.htm"

# step 1: main DataFrame
main_dfs = extract_table_from_url(base_url)
main_df = main_dfs[0] if main_dfs else pd.DataFrame()

# step 2: getting all the links
links_data = get_table_links(base_url)

# step 3:creating new dataframes
all_dataframes = create_dataframes_from_rows(main_df, links_data)


for i, df1 in enumerate(all_dataframes):
    print(f"DataFrame {i + 1}:")
    print(df1.head())

DataFrame 1:
  Indian National Congress - INC                      99  \
                            S.No Parliament Constituency   
0                              1           Anakapalle(5)   
1                              2          Rajahmundry(8)   
2                              3           Narsapuram(9)   
3                              4       Arunachal West(1)   
4                              5       Arunachal East(2)   

                                             0          99          
                             Winning Candidate Total Votes  Margin  
0                                   C.M.RAMESH      762069  296530  
1                     DAGGUBATI PURANDHESHWARI      726515  239139  
2  BHUPATHI RAJU SRINIVASA VARMA (B.J.P.VARMA)      707343  276802  
3                                 KIREN RIJIJU      205417  100738  
4                                    TAPIR GAO      145581   30421  
DataFrame 2:
  Samajwadi Party - SP                      37                 0      