# Import basic libraries and Create functions to fetch data

In [1]:
#@title Import necessary libraries
# Data Manipulation
import pandas as pd

#Visualization
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Web Scrapping
from bs4 import BeautifulSoup
import requests

In [2]:
# Gloabl color codes for printing messages
blue_text = '\033[34m'
green_text = '\033[32m'
red_text = '\033[31m'
yellow_text = '\033[33m'
reset_text = '\033[0m'

In [3]:
#@title Function to check validity of webpage 
def check_valid(seat_code):
  url = 'https://results.eci.gov.in/AcResultGenOct2024/ConstituencywiseS07'+str(seat_code)+'.htm'
  headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Referer": "https://results.eci.gov.in"
    }
  # Step 1: Fetch the webpage content
  response = requests.get(url, headers=headers)
  if response.status_code == 200:
    return True
  else:
    return False

In [4]:
#@title Function to fetch data from webpage and convert the table into standard pandas dataframe
def get_table(seat_code):
  url = 'https://results.eci.gov.in/AcResultGenOct2024/ConstituencywiseS07'+str(seat_code)+'.htm'
  headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Referer": "https://results.eci.gov.in"
    }
  # Step 1: Fetch the webpage content
  response = requests.get(url, headers=headers)
  webpage = response.content
  # Step 2: Parse the HTML
  soup = BeautifulSoup(webpage, 'html.parser')
  # Step 3: Find the specific table (adjust the class name or other attributes as necessary)
  table = soup.find('table', {'class': 'table table-striped table-bordered'})
  # Step 4: Define the headers
  headers = ['S.N.', 'Candidate', 'Party', 'EVM Votes', 'Postal Votes', 'Total Votes', '% of Votes']
  # Step 5: Extract the rows
  rows = []
  for tr in table.find_all('tr')[1:-1]:  # Skip the header row and last total row
      cells = tr.find_all('td')
      row = [cell.get_text().strip() for cell in cells]
      rows.append(row)
  # Step 6: Create a DataFrame and return it
  data = pd.DataFrame(rows, columns=headers)
  data['Constituency'] = seat_code
  return data

# Fetch data from webpage

For delhi election result URL is in form of `https://results.eci.gov.in/AcResultGenOct2024/ConstituencywiseS07<constituency_number>.htm`

Where constuitency number is from 1 to 70

In [5]:
# Extracting election result data for all seats
last_constuitency = 90
table = pd.DataFrame(columns=['S.N.', 'Candidate', 'Party', 'EVM Votes', 'Postal Votes', 'Total Votes', '% of Votes', 'Constituency'])
for constituency in range(1,last_constuitency+1):
  if check_valid(constituency):
      table = pd.concat([table, get_table(constituency)], ignore_index=True)
# Save table as CSV file for further use
table.to_csv('files/haryana_2025_raw_data.csv', index=False)
table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1121 entries, 0 to 1120
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   S.N.          1121 non-null   object
 1   Candidate     1121 non-null   object
 2   Party         1121 non-null   object
 3   EVM Votes     1121 non-null   object
 4   Postal Votes  1121 non-null   object
 5   Total Votes   1121 non-null   object
 6   % of Votes    1121 non-null   object
 7   Constituency  1121 non-null   object
dtypes: object(8)
memory usage: 70.2+ KB


In [6]:
table

Unnamed: 0,S.N.,Candidate,Party,EVM Votes,Postal Votes,Total Votes,% of Votes,Constituency
0,1,OM PARKASH GUJJAR,Aam Aadmi Party,854,4,858,0.59,1
1,2,CHARAN SINGH,Bahujan Samaj Party,1368,6,1374,0.94,1
2,3,PARDEEP CHAUDHARY,Indian National Congress,49544,185,49729,34.07,1
3,4,SHAKTI RANI SHARMA,Bharatiya Janata Party,60497,115,60612,41.53,1
4,5,AMIT SHARMA,Independent,582,0,582,0.4,1
...,...,...,...,...,...,...,...,...
1116,10,LALIT NAGAR S/O LATE BHARAT SINGH NAGAR,Independent,56729,99,56828,27.9,90
1117,11,LALIT NAGAR S/O VIJAY PAL,Independent,978,3,981,0.48,90
1118,12,SANDEEP MEHTA,Independent,533,1,534,0.26,90
1119,13,PUNDIT SUBHASH CHANDRA DUBEY GORAKHPURI,Independent,298,0,298,0.15,90


# Read and merge files for further processing

In [7]:
# Fetch data and merge it with party codes and colors
parties = pd.read_csv('files/Indian_Political_Party_List.csv')
result = pd.read_csv('files/haryana_2025_raw_data.csv')
constituency = pd.read_csv('files/Haryana_Constituency_List.csv')
# Remove unnecessary spaces from party names
result["Party"] = result["Party"].str.strip().replace(r"\s+", " ", regex=True)

result = pd.merge(result, parties, left_on='Party', right_on='Name', how='left')
result = pd.merge(result, constituency, on='Constituency', how='left')

In [8]:
# Change datatypes of columns
result['S.N.'] = result['S.N.'].astype(int)
result['EVM Votes'] = result['EVM Votes'].astype(int)
result['Postal Votes'] = result['Postal Votes'].astype(int)
result['Total Votes'] = result['Total Votes'].astype(int)
result['% of Votes'] = result['% of Votes'].astype(float)
result['Constituency'] = result['Constituency'].astype(int)

In [9]:
result['Rank']=result.groupby(by='Constituency')['Total Votes'].rank('first', ascending=False)
result['WinnerVotes'] = result.groupby('Constituency')['Total Votes'].transform('max')
result['RunnerUpVotes'] = result.groupby('Constituency')['Total Votes'].transform(lambda x: x.nlargest().iloc[1])
result['Margin'] = result.apply(lambda row: row['Total Votes'] - row['RunnerUpVotes'] if row['Rank'] == 1 else row['Total Votes']-row['WinnerVotes'], axis=1)
result['VoterTurnout'] = result.groupby('Constituency')['Total Votes'].transform('sum')
result['VoteShare'] = 100*result['Total Votes']/result['VoterTurnout']
result['VoteShare'] = result['VoteShare'].round(2)
result.sort_values(by=['Constituency', 'Rank'], inplace=True)
result.reset_index(drop=True, inplace=True)
result.fillna({'Color':'#000000'},inplace=True)
result.fillna('Other', inplace=True)

In [10]:
result[result['Total Votes']<704][result['Total Votes']>590].sort_values('Total Votes')

  result[result['Total Votes']<704][result['Total Votes']>590].sort_values('Total Votes')


Unnamed: 0,S.N.,Candidate,Party,EVM Votes,Postal Votes,Total Votes,% of Votes,Constituency,Name,Color,...,Constituency Name,District,Lok Sabha,Constituency Category,Rank,WinnerVotes,RunnerUpVotes,Margin,VoterTurnout,VoteShare
463,5,HAWA SINGH KHOBRA,Jannayak Janta Party,588,3,591,0.33,39,Jannayak Janta Party,#026D37,...,Tohana,Fatehabad,Sirsa,GEN,6.0,88522,77686,-87931,180467,0.33
1060,12,RAHUL,Independent,596,0,596,0.36,85,Independent,#DCDCDC,...,Prithla,Faridabad,Faridabad,GEN,10.0,70262,49721,-69666,167191,0.36
987,11,NOTA,None of the Above,599,1,600,0.29,78,None of the Above,#FFFFFF,...,Sohna,Gurgaon,Gurgaon,GEN,8.0,61243,49366,-60643,203562,0.29
829,4,HARPAL SINGH,Aam Aadmi Party,595,6,601,0.45,65,Aam Aadmi Party,#0072B0,...,Badli,Jhajjar,Rohtak,GEN,4.0,68160,51340,-67559,132300,0.45
67,10,NOTA,None of the Above,601,4,605,0.35,7,None of the Above,#FFFFFF,...,Sadhaura,Yamunanagar,Ambala,SC,6.0,57534,55835,-56929,174150,0.35
378,2,DEEPAK MALIK,Jannayak Janta Party,597,8,605,0.47,33,Jannayak Janta Party,#026D37,...,Baroda,Sonipat,Sonipat,GEN,6.0,54462,48820,-53857,129971,0.47
100,6,DIWAN CHAND KAMBOJ,Bhartiya Shakti Chetna Party,601,5,606,0.4,10,Other,#000000,...,Radaur,Yamunanagar,Kurukshetra,GEN,6.0,73348,60216,-72742,153018,0.4
367,2,KULDEEP MALIK,Jannayak Janta Party,607,1,608,0.46,32,Jannayak Janta Party,#026D37,...,Gohana,Sonipat,Sonipat,GEN,7.0,57055,46626,-56447,130786,0.46
261,13,REKHA,Independent,608,1,609,0.38,23,Independent,#DCDCDC,...,Assandh,Karnal,Karnal,GEN,7.0,54761,52455,-54152,162292,0.38
496,7,TEJPAL,Independent,609,1,610,0.37,41,Independent,#DCDCDC,...,Ratia,Fatehabad,Sirsa,SC,7.0,86426,64984,-85816,164504,0.37


# Party wise analysis

In [11]:
party_wise = result.groupby(by=['Name','Abb','Color']).agg({'EVM Votes':'sum','Postal Votes':'sum','Total Votes':'sum'}).reset_index()
party_wise['Rank']=party_wise['Total Votes'].rank(ascending=False)
party_wise.loc[party_wise.Rank > 8, 'Name'] = 'Other'
party_wise.loc[party_wise.Rank > 8, 'Abb'] = 'Other'

winner = result[result.Rank==1][['Party','Color','Abb']]
winner = winner.groupby(['Party','Color','Abb']).size().reset_index()

# Define the figure with a 1x2 layout
fig = make_subplots(rows=1, cols=2, subplot_titles=["Vote Share", "Seat Share"], specs=[[{"type": "domain"}, {"type": "domain"}]])

# First Pie Chart
fig.add_trace(go.Pie(labels=party_wise.Abb, values=party_wise['Total Votes'],
                     marker=dict(colors=party_wise['Color']), hole=0.5, name=''), row=1, col=1)

# Second Pie Chart
fig.add_trace(go.Pie(labels=winner.Abb, values=winner[0],
                     marker=dict(colors=winner['Color']), hole=0.5, name=''), row=1, col=2)

# Adjust layout for better spacing
fig.update_layout(height=500, width=1000, showlegend=True)

fig.show()

# Constuitency wise analysis

In [12]:
# All constituencies at a glance 
fig = go.Figure()
fig.add_traces(go.Bar(y=result['Constituency Name'], x=result['Total Votes'], customdata=result[['Abb','Candidate']],
                      orientation='h', marker_color=result['Color'], hovertemplate='%{y}<br>%{customdata[1]}<br>%{customdata[0]} %{x} votes',
                      hovertext=list(zip(result['Abb'],result['Candidate'])), name=''))
fig.update_layout(title='Constituency wise all candidates', height=1500)

In [13]:
# Postal ballot at a glance 
fig = go.Figure()
fig.add_traces(go.Bar(y=result['Constituency Name'], x=result['Postal Votes'], customdata=result[['Abb','Candidate']],
                      orientation='h', marker_color=result['Color'], hovertemplate='%{y}<br>%{customdata[1]}<br>%{customdata[0]} %{x} votes',
                      hovertext=list(zip(result['Abb'],result['Candidate'])), name=''))
fig.update_layout(title='Constituency wise Postal Ballot votes', height=1500)

In [14]:
winner = result[result.Rank==1].sort_values(by='Margin', ascending=False)

fig = go.Figure()
fig.add_trace(go.Bar(x=winner['Margin'], y=winner['Constituency Name'],
                     orientation='h', marker_color=winner.Color,
                     text=winner[['Abb','Candidate']],texttemplate='%{text[0]}, %{x}', textposition='outside'))
fig.update_layout(title='Constituency wise Winning Margin', height=1500)

In [15]:
from scipy.stats import chi2_contingency

H0 = 'There is no relation between seat category and party'
H1 = 'Seat category and party are dependent'

winner = result[result.Rank==1]
test_table = pd.crosstab(index=winner['Abb'], columns=winner['Constituency Category'], values=winner['Rank'], aggfunc='count')
statistic, pvalue, dof, expected_freq = chi2_contingency(test_table)

print('\tTest statistic confidence 99% (Significance level 0.01)')
if pvalue < 0.01:
    print('\tReject the null hypothesis', H1)
else:
    print('\tFail to reject the null hypothesis', H0)


sc_seats = result[(result['Constituency Category']!='GEN') & (result['Rank']==1)].sort_values(by='Margin')
fig = go.Figure()
fig.add_trace(go.Bar(x=sc_seats['Constituency Name'], y=sc_seats['Margin'],
                     marker_color=sc_seats['Color']))
fig.update_layout(title='Winning Margin for SC seats', xaxis_title='Constituency Name', yaxis_title='Margin')

	Test statistic confidence 99% (Significance level 0.01)
	Fail to reject the null hypothesis There is no relation between seat category and party


# Second runner-up impact on winning candidate

In [16]:
positions = result[result.Rank.isin([1,2,3])].pivot(index=['Constituency', 'Constituency Name'], columns='Rank',
                                                    values=['Abb','Total Votes','Margin','VoterTurnout'])
positions.columns = [f"{col[0]}_{int(col[1])}" for col in positions.columns]
positions.reset_index(inplace=True)
positions[['Total Votes_1','Total Votes_2','Total Votes_3','Margin_1','Margin_2','Margin_3']]=positions[['Total Votes_1','Total Votes_2','Total Votes_3','Margin_1','Margin_2','Margin_3']].astype(int)
positions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Constituency       90 non-null     int64 
 1   Constituency Name  90 non-null     object
 2   Abb_1              90 non-null     object
 3   Abb_2              90 non-null     object
 4   Abb_3              90 non-null     object
 5   Total Votes_1      90 non-null     int64 
 6   Total Votes_2      90 non-null     int64 
 7   Total Votes_3      90 non-null     int64 
 8   Margin_1           90 non-null     int64 
 9   Margin_2           90 non-null     int64 
 10  Margin_3           90 non-null     int64 
 11  VoterTurnout_1     90 non-null     object
 12  VoterTurnout_2     90 non-null     object
 13  VoterTurnout_3     90 non-null     object
dtypes: int64(7), object(7)
memory usage: 10.0+ KB


In [17]:
from scipy.stats import shapiro, pearsonr, spearmanr

alpha = 0.01

# Check if data is normal or not
_, margin_p = shapiro(positions['Margin_1'])
_, votes_p = shapiro(positions['Total Votes_3'])

if (margin_p > alpha) and (votes_p > alpha):
    # If data is normal, use Pearson's correlation
    corr, p = pearsonr(positions['Margin_1'], positions['Total Votes_3'])
else:
    # If data is not normal, use Spearman's correlation
    corr, p = spearmanr(positions['Margin_1'], positions['Total Votes_3'])

print(f"{yellow_text}Correlation: {corr:.2f}, p-value: {p:.2f}")
if p < alpha:
    print(f"{green_text}Reject the null hypothesis.\nThere is a significant correlation between second runner-up vote share and winner candidate margin.")
else:
    print(f"{red_text}Fail to reject the null hypothesis.\nThere is a no significant correlation between second runner-up vote share and winner candidate margin.")


[33mCorrelation: 0.11, p-value: 0.29
[31mFail to reject the null hypothesis.
There is a no significant correlation between second runner-up vote share and winner candidate margin.
