# Import basic libraries and Create functions to fetch data

In [143]:
#@title Import necessary libraries
# Data Manipulation
import pandas as pd

#Visualization
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Web Scrapping
from bs4 import BeautifulSoup
import requests

In [393]:
# Gloabl color codes for printing messages
blue_text = '\033[34m'
green_text = '\033[32m'
red_text = '\033[31m'
yellow_text = '\033[33m'
reset_text = '\033[0m'

In [4]:
#@title Function to check validity of webpage 
def check_valid(seat_code):
  url = 'https://results.eci.gov.in/ResultAcGenFeb2025/ConstituencywiseU05'+str(seat_code)+'.htm'
  headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Referer": "https://results.eci.gov.in"
    }
  # Step 1: Fetch the webpage content
  response = requests.get(url, headers=headers)
  if response.status_code == 200:
    return True
  else:
    return False

In [6]:
#@title Function to fetch data from webpage and convert the table into standard pandas dataframe
def get_table(seat_code):
  url = 'https://results.eci.gov.in/ResultAcGenFeb2025/ConstituencywiseU05'+str(seat_code)+'.htm'
  headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Referer": "https://results.eci.gov.in"
    }
  # Step 1: Fetch the webpage content
  response = requests.get(url, headers=headers)
  webpage = response.content
  # Step 2: Parse the HTML
  soup = BeautifulSoup(webpage, 'html.parser')
  # Step 3: Find the specific table (adjust the class name or other attributes as necessary)
  table = soup.find('table', {'class': 'table table-striped table-bordered'})
  # Step 4: Define the headers
  headers = ['S.N.', 'Candidate', 'Party', 'EVM Votes', 'Postal Votes', 'Total Votes', '% of Votes']
  # Step 5: Extract the rows
  rows = []
  for tr in table.find_all('tr')[1:-1]:  # Skip the header row and last total row
      cells = tr.find_all('td')
      row = [cell.get_text().strip() for cell in cells]
      rows.append(row)
  # Step 6: Create a DataFrame and return it
  data = pd.DataFrame(rows, columns=headers)
  data['Constituency'] = seat_code
  return data


# Fetch data from webpage

For delhi election result URL is in form of `https://results.eci.gov.in/ResultAcGenFeb2025/ConstituencywiseU05<constituency_number>.htm`

Where constuitency number is from 1 to 70

In [84]:
# Extracting election result data for all seats
last_constuitency = 70
table = pd.DataFrame(columns=['S.N.', 'Candidate', 'Party', 'EVM Votes', 'Postal Votes', 'Total Votes', '% of Votes', 'Constituency'])
for constituency in range(1,last_constuitency+1):
  if check_valid(constituency):
      table = pd.concat([table, get_table(constituency)], ignore_index=True)
# Save table as CSV file for further use
table.to_csv('files/delhi_2025_raw_data.csv', index=False)
table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 769 entries, 0 to 768
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   S.N.          769 non-null    object
 1   Candidate     769 non-null    object
 2   Party         769 non-null    object
 3   EVM Votes     769 non-null    object
 4   Postal Votes  769 non-null    object
 5   Total Votes   769 non-null    object
 6   % of Votes    769 non-null    object
 7   Constituency  769 non-null    object
dtypes: object(8)
memory usage: 48.2+ KB


# Read and merge files for further processing

In [235]:
# Fetch data and merge it with party codes and colors
parties = pd.read_csv('files/Indian_Political_Party_List.csv')
result = pd.read_csv('files/delhi_2025_raw_data.csv')
constituency = pd.read_csv('files/Delhi_Constituency_List.csv')
# Remove unnecessary spaces from party names
result["Party"] = result["Party"].str.strip().replace(r"\s+", " ", regex=True)

result = pd.merge(result, parties, left_on='Party', right_on='Name', how='left')
result = pd.merge(result, constituency, on='Constituency', how='left')

In [236]:
# Change datatypes of columns
result['S.N.'] = result['S.N.'].astype(int)
result['EVM Votes'] = result['EVM Votes'].astype(int)
result['Postal Votes'] = result['Postal Votes'].astype(int)
result['Total Votes'] = result['Total Votes'].astype(int)
result['% of Votes'] = result['% of Votes'].astype(float)
result['Constituency'] = result['Constituency'].astype(int)

In [237]:
result['Rank']=result.groupby(by='Constituency')['Total Votes'].rank('first', ascending=False)
result['WinnerVotes'] = result.groupby('Constituency')['Total Votes'].transform('max')
result['RunnerUpVotes'] = result.groupby('Constituency')['Total Votes'].transform(lambda x: x.nlargest().iloc[1])
result['Margin'] = result.apply(lambda row: row['Total Votes'] - row['RunnerUpVotes'] if row['Rank'] == 1 else row['Total Votes']-row['WinnerVotes'], axis=1)
result.sort_values(by=['Constituency', 'Rank'], inplace=True)
result.reset_index(drop=True, inplace=True)
result.fillna({'Color':'#000000'},inplace=True)
result.fillna('Other', inplace=True)
#sorted(result[result.Abb=='Other'].Party.unique())

# Party wise analysis

In [None]:
party_wise = result.groupby(by=['Name','Abb','Color']).agg({'EVM Votes':'sum','Postal Votes':'sum','Total Votes':'sum'}).reset_index()
party_wise['Rank']=party_wise['Total Votes'].rank(ascending=False)
party_wise.loc[party_wise.Rank > 8, 'Name'] = 'Other'
party_wise.loc[party_wise.Rank > 8, 'Abb'] = 'Other'

winner = result[result.Rank==1][['Party','Color','Abb']]
winner = winner.groupby(['Party','Color','Abb']).size().reset_index()

# Define the figure with a 1x2 layout
fig = make_subplots(rows=1, cols=2, subplot_titles=["Vote Share", "Seat Share"], specs=[[{"type": "domain"}, {"type": "domain"}]])

# First Pie Chart
fig.add_trace(go.Pie(labels=party_wise.Abb, values=party_wise['Total Votes'],
                     marker=dict(colors=party_wise['Color']), hole=0.5), row=1, col=1)

# Second Pie Chart
fig.add_trace(go.Pie(labels=winner.Abb, values=winner[0],
                     marker=dict(colors=winner['Color']), hole=0.5), row=1, col=2)

# Adjust layout for better spacing
fig.update_layout(height=500, width=1000, showlegend=True)

fig.show()

# Constuitency wise analysis

In [306]:
# All constituencies at a glance 
fig = go.Figure()
fig.add_traces(go.Bar(y=result['Constituency Name'], x=result['Total Votes'], customdata=result[['Abb','Candidate']],
                      orientation='h', marker_color=result['Color'], hovertemplate='%{y}<br>%{customdata[1]}<br>%{customdata[0]} %{x} votes',
                      hovertext=list(zip(result['Abb'],result['Candidate'])), name=''))
fig.update_layout(title='Constituency wise all candidates', height=1500)


In [None]:
# Postal ballot at a glance 
fig = go.Figure()
fig.add_traces(go.Bar(y=result['Constituency Name'], x=result['Postal Votes'], customdata=result[['Abb','Candidate']],
                      orientation='h', marker_color=result['Color'], hovertemplate='%{y}<br>%{customdata[1]}<br>%{customdata[0]} %{x} votes',
                      hovertext=list(zip(result['Abb'],result['Candidate'])), name=''))
fig.update_layout(title='Constituency wise Postal Ballot votes', height=1500)

In [321]:
winner = result[result.Rank==1].sort_values(by='Margin', ascending=False)

fig = go.Figure()
fig.add_trace(go.Bar(x=winner['Margin'], y=winner['Constituency Name'],
                     orientation='h', marker_color=winner.Color,
                     text=winner[['Abb','Candidate']],texttemplate='%{text[0]}, %{x}', textposition='outside'))
fig.update_layout(title='Constituency wise Winning Margin', height=1500)

In [347]:
from scipy.stats import chi2_contingency

H0 = 'There is no relation between seat category and party'
H1 = 'Seat category and party are dependent'

winner = result[result.Rank==1]
test_table = pd.crosstab(index=winner['Abb'], columns=winner['Constituency Category'], values=winner['Rank'], aggfunc='count')
statistic, pvalue, dof, expected_freq = chi2_contingency(test_table)

print('Test statistic confidence 99% (Significance lvel 0.01)')
if pvalue < 0.01:
    print('Reject the null hypothesis', H1)
else:
    print('Fail to reject the null hypothesis', H0)


sc_seats = result[(result['Constituency Category']!='GEN') & (result['Rank']==1)].sort_values(by='Margin')
fig = go.Figure()
fig.add_trace(go.Bar(x=sc_seats['Constituency Name'], y=sc_seats['Margin'],
                     marker_color=sc_seats['Color']))
fig.update_layout(title='Winning Margin for SC seats', xaxis_title='Constituency Name', yaxis_title='Margin')

Test statistic confidence 99% (Significance lvel 0.01)
Fail to reject the null hypothesis There is no relation between seat category and party


# Partywise performance

In [382]:
positions = result[result.Rank.isin([1,2,3])].pivot(index=['Constituency', 'Constituency Name'], columns='Rank',
                                                    values=['Abb','Total Votes','Margin'])
positions.columns = [f"{col[0]}_{int(col[1])}" for col in positions.columns]
positions.reset_index(inplace=True)

In [394]:
from scipy.stats import shapiro, pearsonr, spearmanr

alpha = 0.01
# Check if data is normal or not
_, margin_p = shapiro(positions['Margin_1'])
_, votes_p = shapiro(positions['Total Votes_3'])

if (margin_p > alpha) and (votes_p > alpha):
    # If data is normal, use Pearson's correlation
    corr, p = pearsonr(positions['Margin_1'], positions['Total Votes_3'])
else:
    # If data is not normal, use Spearman's correlation
    corr, p = spearmanr(positions['Margin_1'], positions['Total Votes_3'])

print(f"{yellow_text}Correlation: {corr:.2f}, p-value: {p:.2f}")
if p < alpha:
    print(f"{green_text}Reject the null hypothesis.\nThere is a significant correlation between second runner-up vote share and winner candidate margin.")
else:
    print(f"{red_text}Fail to reject the null hypothesis.\nThere is a no significant correlation between second runner-up vote share and winner candidate margin.")

[33mCorrelation: 0.13, p-value: 0.29
[31mFail to reject the null hypothesis.
There is a no significant correlation between second runner-up vote share and winner candidate margin.
