In [1]:
import pandas as pd
import io
from google.colab import files

uploaded = files.upload()

file_name = list(uploaded.keys())[0]  # Get the name of the uploaded file
df = pd.read_csv(io.BytesIO(uploaded[file_name]))

print("First 5 rows of the dataset:")
print(df.head())

Saving general_election_tamilnadu_legislative_assembly_2016_results_at_a_glance_date_of_general_election_2018.csv to general_election_tamilnadu_legislative_assembly_2016_results_at_a_glance_date_of_general_election_2018.csv
First 5 rows of the dataset:
   S.No               Name of the Political Party  \
0     1  All India Anna Dravida Munnetra Kazhagam   
1     2                   All India Forward Block   
2     3                       Bahujan Samaj Party   
3     4                    Bharatiya Janata Party   
4     5                  Communist Party of India   

   Total No. of Seats Con-tested No. of Seats won  Total No. of Votes Polled  \
0                            234             135*                   17616266   
1                             33                0                      44546   
2                            158                0                      97823   
3                            188                0                    1228704   
4                           

In [2]:
df.columns = df.columns.str.strip().str.replace('-', '')

print("\nCleaned column names:")
print(df.columns)


Cleaned column names:
Index(['S.No', 'Name of the Political Party', 'Total No. of Seats Contested',
       'No. of Seats won', 'Total No. of Votes Polled',
       'Percentage of Total Votes Polled in the State',
       'Percentage of Total Votes in the Seats Contested'],
      dtype='object')


In [4]:
df['Seat Efficiency'] = (pd.to_numeric(df['No. of Seats won'], errors='coerce') / pd.to_numeric(df['Total No. of Votes Polled'], errors='coerce')) * 100

df_sorted_by_seats = df.sort_values(by='No. of Seats won', ascending=False)
df_sorted_by_efficiency = df.sort_values(by='Seat Efficiency', ascending=False)

print("\nTop 5 parties by seats won:")
print(df_sorted_by_seats.head())

print("\nTop 5 parties by seat efficiency:")
print(df_sorted_by_efficiency.head())



Top 5 parties by seats won:
    S.No               Name of the Political Party  \
7      8                 Dravida Munnetra Kazhagam   
8      9                  Indian National Congress   
0      1  All India Anna Dravida Munnetra Kazhagam   
14    15                Indian Union Musilm League   
1      2                   All India Forward Block   

    Total No. of Seats Contested No. of Seats won  Total No. of Votes Polled  \
7                            180               88                   13669116   
8                             41                8                    2774075   
0                            234             135*                   17616266   
14                             5                1                     313808   
1                             33                0                      44546   

    Percentage of Total Votes Polled in the State  \
7                                           31.64   
8                                            6.42   
0     

In [11]:
print(df[['No. of Seats won', 'Total No. of Seats Contested']].info())

print("\nDataset with Seat Efficiency and Seat Share Percentage:")
print(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 2 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   No. of Seats won              15 non-null     object
 1   Total No. of Seats Contested  15 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 372.0+ bytes
None

Dataset with Seat Efficiency and Seat Share Percentage:
   S.No               Name of the Political Party  \
0     1  All India Anna Dravida Munnetra Kazhagam   
1     2                   All India Forward Block   
2     3                       Bahujan Samaj Party   
3     4                    Bharatiya Janata Party   
4     5                  Communist Party of India   

   Total No. of Seats Contested No. of Seats won  Total No. of Votes Polled  \
0                           234             135*                   17616266   
1                            33                0                      44

In [13]:
df['Seat Efficiency'] = (pd.to_numeric(df['No. of Seats won'], errors='coerce') / pd.to_numeric(df['Total No. of Votes Polled'], errors='coerce')) * 100

df['Seat Share Percentage'] = (pd.to_numeric(df['No. of Seats won'], errors='coerce') / pd.to_numeric(df['Total No. of Seats Contested'], errors='coerce')) * 100


df_sorted_by_seats = df.sort_values(by='No. of Seats won', ascending=False)
df_sorted_by_efficiency = df.sort_values(by='Seat Efficiency', ascending=False)

print("\nTop 5 parties by seats won:")
print(df_sorted_by_seats.head())

print("\nTop 5 parties by seat efficiency:")
print(df_sorted_by_efficiency.head())

top_5_vote_share = df.nlargest(5, 'Percentage of Total Votes Polled in the State')

top_5_seat_share = df.nlargest(5, 'Seat Share Percentage')

print("\nTop 5 parties by vote share:")
print(top_5_vote_share)

print("\nTop 5 parties by seat share:")
print(top_5_seat_share)



Top 5 parties by seats won:
    S.No               Name of the Political Party  \
7      8                 Dravida Munnetra Kazhagam   
8      9                  Indian National Congress   
0      1  All India Anna Dravida Munnetra Kazhagam   
14    15                Indian Union Musilm League   
1      2                   All India Forward Block   

    Total No. of Seats Contested No. of Seats won  Total No. of Votes Polled  \
7                            180               88                   13669116   
8                             41                8                    2774075   
0                            234             135*                   17616266   
14                             5                1                     313808   
1                             33                0                      44546   

    Percentage of Total Votes Polled in the State  \
7                                           31.64   
8                                            6.42   
0     

In [15]:

import plotly.express as px

print("\nVisualization 1: Party Performance (Seats Won vs Votes Polled)")

df['Seat Efficiency'] = df['Seat Efficiency'].fillna(0)

fig1 = px.scatter(df, x='Total No. of Votes Polled', y='No. of Seats won',
                  color='Name of the Political Party', size='Seat Efficiency',
                  title='Party Performance: Seats Won vs Votes Polled',
                  labels={'Total No. of Votes Polled': 'Votes Polled', 'No. of Seats won': 'Seats Won'})
fig1.show()




Visualization 1: Party Performance (Seats Won vs Votes Polled)


In [16]:

df.to_csv('analyzed_election_results.csv', index=False)
print("\nAnalyzed dataset saved as 'analyzed_election_results.csv'.")


Analyzed dataset saved as 'analyzed_election_results.csv'.
