## Data Extraction & Ingestion

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [2]:
url = 'https://en.wikipedia.org/wiki/Results_of_the_2024_Indian_general_election'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

In [3]:
all_tables = soup.find_all('table')
all_tables

[<table class="sidebar sidebar-collapse nomobile nowraplinks vcard hlist" style="width:20em; border: 4px double #D4AF37;"><tbody><tr><td class="sidebar-pretitle"><b><span style="color:var(--color-base, #101112)">This article is part of a series on the</span></b></td></tr><tr><th class="sidebar-title-with-pretitle" style="background: #FF671F; border: 1px double #8C959A;"><small><a href="/wiki/Politics_of_India" title="Politics of India"><span class="tmpl-colored-link" style="color: white; text-decoration: inherit;">Politics of India</span></a></small></th></tr><tr><td class="sidebar-image"><figure class="mw-halign-center skin-invert-image" typeof="mw:File"><a class="mw-file-description" href="/wiki/File:Emblem_of_India.svg"><img class="mw-file-element" data-file-height="933" data-file-width="585" decoding="async" height="128" src="//upload.wikimedia.org/wikipedia/commons/thumb/5/55/Emblem_of_India.svg/120px-Emblem_of_India.svg.png" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/

In [4]:
with open("output.txt", "w") as file_object:
    for table in all_tables:
        file_object.write(str(table))
        file_object.write("\n\n")

In [5]:
table = soup.find_all('table', class_ = 'wikitable sortable mw-collapsible')
table

[<table class="wikitable sortable mw-collapsible">
 <tbody><tr>
 <th rowspan="2">State
 </th>
 <th colspan="3">Parliamentary Constituency
 </th>
 <th colspan="5">Winner<sup class="reference" id="cite_ref-Lok_Sabha_Elections_2024_Results:_Full_List_of_winners_on_all_543_seats_32-0"><a href="#cite_note-Lok_Sabha_Elections_2024_Results:_Full_List_of_winners_on_all_543_seats-32"><span class="cite-bracket">[</span>27<span class="cite-bracket">]</span></a></sup>
 </th>
 <th colspan="5">Runner Up
 </th>
 <th colspan="2">Margin
 </th></tr>
 <tr>
 <th>No.
 </th>
 <th>Name
 </th>
 <th>Type
 </th>
 <th>Candidate
 </th>
 <th colspan="2">Party
 </th>
 <th>%
 </th>
 <th data-sort-type="number">Votes
 </th>
 <th>Candidate
 </th>
 <th colspan="2">Party
 </th>
 <th>%
 </th>
 <th data-sort-type="number">Votes
 </th>
 <th>%
 </th>
 <th>Votes
 </th></tr>
 <tr>
 <td><b><a href="/wiki/Andaman_and_Nicobar_Islands" title="Andaman and Nicobar Islands">Andaman and Nicobar Islands</a></b>
 </td>
 <td>1
 </td>
 <

In [6]:
titles = [header.text.strip() for header in table[0].find_all('th')]
print(len(titles))
print(titles)

19
['State', 'Parliamentary Constituency', 'Winner[27]', 'Runner Up', 'Margin', 'No.', 'Name', 'Type', 'Candidate', 'Party', '%', 'Votes', 'Candidate', 'Party', '%', 'Votes', '%', 'Votes', '']


In [7]:
del titles[1:5]
len(titles)

15

In [8]:
titles.pop()

''

In [9]:
len(titles)
print(titles)

['State', 'No.', 'Name', 'Type', 'Candidate', 'Party', '%', 'Votes', 'Candidate', 'Party', '%', 'Votes', '%', 'Votes']


In [10]:
df = pd.DataFrame(columns=titles)
df.columns

Index(['State', 'No.', 'Name', 'Type', 'Candidate', 'Party', '%', 'Votes',
       'Candidate', 'Party', '%', 'Votes', '%', 'Votes'],
      dtype='object')

In [11]:
ind_rows = []
rows=table[0].find_all('tr')
rows = rows[2:]
for i in rows:
    individual_rows = i.find_all('td')
    ind_rows = [j.text.strip() for j in individual_rows]
    if len(ind_rows) == 16:
        print(ind_rows)

['Andaman and Nicobar Islands', '1', 'Andaman & Nicobar Islands', '', 'Bishnu Pada Ray', '', 'BJP', '50.6%', '1,02,436', 'Kuldeep Rai Sharma', '', 'INC', '38.5%', '78,040', '12.1%', '24,396']
['Andhra Pradesh[28]', '1', 'Araku', 'ST', 'Gumma Thanuja Rani', '', 'YSRCP', '40.96%', '4,77,005', 'Kothapalli Geetha', '', 'BJP', '36.62%', '4,26,425', '4.34%', '50,580']
['Arunachal Pradesh', '1', 'Arunachal West', 'ST', 'Kiren Rijiju', '', 'BJP', '51.4%', '2,05,417', 'Nabam Tuki', '', 'INC', '26.2%', '1,04,679', '25.2%', '100,738']
['Assam', '1', 'Kokrajhar', 'ST', 'Joyanta Basumatary', '', 'UPPL', '39.4%', '4,88,995', 'Kampa Borgoyari', '', 'BPF', '35.2%', '4,37,412', '4.2%', '51,583']
['Bihar[29]', '1', 'Valmiki Nagar', '', 'Sunil Kumar', '', 'JD(U)', '47.5%', '5,23,422', 'Deepak Yadav', '', 'RJD', '38.5%', '4,24,747', '9%', '98,675']
['Chandigarh', '1', 'Chandigarh', '', 'Manish Tewari', '', 'INC', '48.23%', '2,16,657', 'Sanjay Tandon', '', 'BJP', '47.66%', '2,14,153', '0.57%', '2504']
['Ch

In [12]:
print(ind_rows)

['42', 'Birbhum', '', 'Satabdi Roy', '', 'AITC', '47%', '7,17,961', 'Debtanu Bhattacharya', '', 'BJP', '34.06%', '5,20,311', '12.94%', '1,97,650']


In [13]:
ind_rows = []
rows=table[0].find_all('tr')
rows = rows[2:]
for i in rows:
    individual_rows = i.find_all('td')
    ind_rows = [j.text.strip() for j in individual_rows]
    if len(ind_rows) == 15:
        ind_rows.insert(0,np.nan)
        ind_rows.pop(10)
        ind_rows.pop(5)
        print(ind_rows)
        length = len(df)
        df.loc[length] = ind_rows

[nan, '2', 'Srikakulam', '', 'Kinjarapu Ram Mohan Naidu', 'TDP', '61.05%', '7,54,328', 'Perada Tilak', 'YSRCP', '34.51%', '4,26,427', '26.54%', '3,27,901']
[nan, '3', 'Vizianagaram', '', 'Appalanaidu Kalisetti', 'TDP', '57.20%', '7,43,113', 'Bellana Chandra Sekhar', 'YSRCP', '38.00%', '4,93,762', '19.19%', '2,49,351']
[nan, '4', 'Visakhapatnam', '', 'Mathukumilli Bharat', 'TDP', '65.42%', '9,07,467', 'Botsa Jhansi Lakshmi', 'YSRCP', '29.07%', '4,03,220', '36.35%', '5,04,247']
[nan, '5', 'Anakapalli', '', 'C. M. Ramesh', 'BJP', '57.50%', '7,62,069', 'Budi Mutyala Naidu', 'YSRCP', '35.13%', '4,65,539', '22.37%', '2,96,530']
[nan, '6', 'Kakinada', '', 'Tangella Uday Srinivas', 'JSP', '54.87%', '7,29,699', 'Chalamalasetti Sunil', 'YSRCP', '37.62%', '5,00,208', '17.26%', '2,29,491']
[nan, '7', 'Amalapuram', 'SC', 'Ganti Harish Madhur', 'TDP', '61.25%', '7,96,981', 'Rapaka Vara Prasada Rao', 'YSRCP', '34.95%', '4,54,785', '26.30%', '3,42,196']
[nan, '8', 'Rajahmundry', '', 'Daggubati Purande

In [14]:
ind_rows = []
rows=table[0].find_all('tr')
rows = rows[2:]
for i in rows:
    individual_rows = i.find_all('td')
    ind_rows = [j.text.strip() for j in individual_rows]
    if len(ind_rows) == 16:
        print(ind_rows)
        ind_rows.pop(10)
        ind_rows.pop(5)
        print(ind_rows)
        length = len(df)
        df.loc[length] = ind_rows

['Andaman and Nicobar Islands', '1', 'Andaman & Nicobar Islands', '', 'Bishnu Pada Ray', '', 'BJP', '50.6%', '1,02,436', 'Kuldeep Rai Sharma', '', 'INC', '38.5%', '78,040', '12.1%', '24,396']
['Andaman and Nicobar Islands', '1', 'Andaman & Nicobar Islands', '', 'Bishnu Pada Ray', 'BJP', '50.6%', '1,02,436', 'Kuldeep Rai Sharma', 'INC', '38.5%', '78,040', '12.1%', '24,396']
['Andhra Pradesh[28]', '1', 'Araku', 'ST', 'Gumma Thanuja Rani', '', 'YSRCP', '40.96%', '4,77,005', 'Kothapalli Geetha', '', 'BJP', '36.62%', '4,26,425', '4.34%', '50,580']
['Andhra Pradesh[28]', '1', 'Araku', 'ST', 'Gumma Thanuja Rani', 'YSRCP', '40.96%', '4,77,005', 'Kothapalli Geetha', 'BJP', '36.62%', '4,26,425', '4.34%', '50,580']
['Arunachal Pradesh', '1', 'Arunachal West', 'ST', 'Kiren Rijiju', '', 'BJP', '51.4%', '2,05,417', 'Nabam Tuki', '', 'INC', '26.2%', '1,04,679', '25.2%', '100,738']
['Arunachal Pradesh', '1', 'Arunachal West', 'ST', 'Kiren Rijiju', 'BJP', '51.4%', '2,05,417', 'Nabam Tuki', 'INC', '26.2

## Data Preprocessing

In [15]:
df.columns

Index(['State', 'No.', 'Name', 'Type', 'Candidate', 'Party', '%', 'Votes',
       'Candidate', 'Party', '%', 'Votes', '%', 'Votes'],
      dtype='object')

In [18]:
df['State'] = df['State'].apply(editing_state_name)

In [17]:
def editing_state_name(x):
    states_to_keep = [
        "Andaman and Nicobar Islands",
        "Arunachal Pradesh",
        "Assam",
        "Chandigarh",
        "Chhattisgarh",
        "Dadra and Nagar Haveli and Daman and Diu"
    ]
    if pd.isna(x) or x in states_to_keep :
        return x
    else: 
        return(x[:-4])

In [19]:
df['State'].unique()

array([nan, 'Andaman and Nicobar Islands', 'Andhra Pradesh',
       'Arunachal Pradesh', 'Assam', 'Bihar', 'Chandigarh',
       'Chhattisgarh', 'Dadra and Nagar Haveli and Daman and Diu',
       'Delhi', 'Goa', 'Gujarat', 'Haryana', 'Himachal Pradesh',
       'Jammu and Kashmir', 'Jharkhand', 'Karnataka', 'Kerala', 'Ladakh',
       'Lakshadweep', 'Madhya Pradesh', 'Maharashtra', 'Manipur',
       'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Puducherry',
       'Punjab', 'Rajasthan', 'Sikkim', 'Tamil Nadu', 'Telangana',
       'Tripura', 'Uttar Pradesh', 'Uttarakhand', 'West Bengal'],
      dtype=object)

In [20]:
df['State'].unique()

array([nan, 'Andaman and Nicobar Islands', 'Andhra Pradesh',
       'Arunachal Pradesh', 'Assam', 'Bihar', 'Chandigarh',
       'Chhattisgarh', 'Dadra and Nagar Haveli and Daman and Diu',
       'Delhi', 'Goa', 'Gujarat', 'Haryana', 'Himachal Pradesh',
       'Jammu and Kashmir', 'Jharkhand', 'Karnataka', 'Kerala', 'Ladakh',
       'Lakshadweep', 'Madhya Pradesh', 'Maharashtra', 'Manipur',
       'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Puducherry',
       'Punjab', 'Rajasthan', 'Sikkim', 'Tamil Nadu', 'Telangana',
       'Tripura', 'Uttar Pradesh', 'Uttarakhand', 'West Bengal'],
      dtype=object)

In [21]:
state_to_consti_map = {
    'Andaman and Nicobar Islands': ['Andaman and Nicobar Islands'],
    'Andhra Pradesh': [
        'Araku','Srikakulam','Vizianagaram','Visakhapatnam','Anakapalli',
        'Kakinada','Amalapuram','Rajahmundry','Narasapuram','Eluru',
        'Machilipatnam','Vijayawada','Guntur','Narasaraopet','Bapatla',
        'Ongole','Nandyal','Kurnool','Anantapur','Hindupur',
        'Kadapa','Nellore','Tirupati','Rajampet','Chittoor'
    ],
    'Arunachal Pradesh': ['Arunachal West','Arunachal East'],
    'Assam': [
        'Kokrajhar','Dhubri','Barpeta','Darrang–Udalguri','Guwahati',
        'Diphu','Karimganj','Silchar','Nagaon','Kaziranga',
        'Sonitpur','Lakhimpur','Dibrugarh','Jorhat'
    ],
    'Bihar': [
        'Valmiki Nagar','Paschim Champaran','Purvi Champaran','Sheohar',
        'Sitamarhi','Madhubani','Jhanjharpur','Supaul','Araria',
        'Kishanganj','Katihar','Purnia','Madhepura','Darbhanga',
        'Muzaffarpur','Vaishali','Gopalganj','Siwan','Maharajganj',
        'Saran','Hajipur','Ujiarpur','Samastipur','Begusarai',
        'Khagaria','Bhagalpur','Banka','Munger','Nalanda',
        'Patna Sahib','Pataliputra','Arrah','Buxar','Sasaram',
        'Karakat','Jahanabad','Aurangabad','Gaya','Nawada','Jamui'
    ],
    'Chandigarh': ['Chandigarh'],
    'Chhattisgarh': [
        'Sarguja','Raigarh','Janjgir‑Champa','Korba','Bilaspur',
        'Rajnandgaon','Durg','Raipur','Mahasamund','Bastar','Kanker'
    ],
    'Dadra and Nagar Haveli and Daman and Diu': [
        'Daman and Diu','Dadra and Nagar Haveli'
    ],
    'Delhi': [
        'Chandni Chowk','North East Delhi','East Delhi','New Delhi',
        'North West Delhi','West Delhi','South Delhi'
    ],
    'Goa': ['North Goa','South Goa'],
    'Gujarat': [
        'Kachchh','Banaskantha','Patan','Mahesana','Sabarkantha',
        'Gandhinagar','Ahmedabad East','Ahmedabad West','Surendranagar',
        'Rajkot','Porbandar','Jamnagar','Junagadh','Amreli','Bhavnagar',
        'Anand','Kheda','Panchmahal','Dahod','Vadodara',
        'Chhota Udaipur','Bharuch','Bardoli','Surat','Navsari','Valsad'
    ],
    'Haryana': [
        'Ambala','Kurukshetra','Sirsa','Hisar','Karnal','Sonipat',
        'Rohtak','Bhiwani‑Mahendragarh','Gurgaon','Faridabad'
    ],
    'Himachal Pradesh': [
        'Hamirpur','Mandi','Shimla','Kangra'
    ],
    'Jammu and Kashmir': [
        'Srinagar','Baramulla','Anantnag–Rajouri','Udhampur','Jammu'
    ],
    'Jharkhand': [
        'Chatra','Giridih','Ranchi','Jamshedpur','Dhanbad',
        'Singhbhum','Hazaribagh','Palamu','Godda','Rajmahal',
        'Khunti','Dumka','Garhwa','Sahibganj'
    ],
    'Karnataka': [
        'Chikkodi','Belgaum','Bagalkot','Bijapur','Gulbarga','Raichur',
        'Bidar','Koppal','Bellary','Haveri','Dharwad','Uttara Kannada',
        'Davanagere','Shimoga','Udupi Chikmagalur','Hassan',
        'Dakshina Kannada','Chitradurga','Tumkur','Mandya','Mysore',
        'Chamarajanagar','Bangalore Rural','Bangalore North',
        'Bangalore Central','Bangalore South','Chikballapur','Kolar'
    ],
    'Kerala': [
        'Kasaragod','Kannur','Vatakara','Wayanad','Kozhikode','Malappuram',
        'Ponnani','Palakkad','Alathur','Thrissur','Chalakudy','Ernakulam',
        'Idukki','Kottayam','Alappuzha','Mavelikara','Pathanamthitta',
        'Kollam','Attingal','Thiruvananthapuram'
    ],
    'Ladakh': ['Ladakh'],
    'Lakshadweep': ['Lakshadweep'],
    'Madhya Pradesh': [
        'Balaghat','Betul','Chhindwara','Damoh','Dewas','Dhar',
        'Guna','Gwalior','Harda','Hoshangabad','Indore','Jabalpur',
        'Khandwa','Khargone','Mandla','Mandsaur','Morena','Narmadapuram',
        'Neemuch','Panna','Rewa','Sagar','Satna','Seoni','Sehore',
        'Shivpuri','Ujjain','Vidisha','Ratlam'
    ],
    'Maharashtra': [
        'Nandurbar','Dhule','Jalgaon','Raver','Buldhana','Akola','Amravati',
        'Wardha','Ramtek','Nagpur','Bhandara‑Gondiya','Gadchiroli‑Chimur',
        'Chandrapur','Yavatmal‑Washim','Hingoli','Nanded','Parbhani',
        'Jalna','Aurangabad','Dindori','Nashik','Palghar','Bhiwandi',
        'Kalyan','Thane','Mumbai North','Mumbai North West','Mumbai North East',
        'Mumbai North Central','Mumbai South Central','Mumbai South',
        'Raigad','Maval','Pune','Baramati','Shirur','Ahmednagar',
        'Shirdi','Beed','Osmanabad','Latur','Solapur','Madha','Sangli',
        'Satara','Ratnagiri‑Sindhudurg','Kolhapur','Hatkanangle'
    ],
    'Manipur': ['Inner Manipur','Outer Manipur'],
    'Meghalaya': ['Shillong','Tura'],
    'Mizoram': ['Mizoram'],
    'Nagaland': ['Nagaland'],
    'Odisha': [
        'Bargarh','Sundargarh','Sambalpur','Keonjhar','Mayurbhanj',
        'Balasore','Bhadrak','Jajpur','Dhenkanal','Bolangir','Kalahandi',
        'Nabarangpur','Kandhamal','Cuttack','Kendrapara','Jagatsinghpur',
        'Puri','Bhubaneswar','Aska','Berhampur','Koraput'
    ],
    'Puducherry': ['Puducherry'],
    'Punjab': [
        'Gurdaspur','Amritsar','Khadoor Sahib','Jalandhar','Hoshiarpur',
        'Anandpur Sahib','Ludhiana','Fatehgarh Sahib','Faridkot','Firozpur',
        'Bathinda','Sangrur','Patiala'
    ],
    'Rajasthan': [
        'Ganganagar','Bikaner','Churu','Jhunjhunu','Sikar','Jaipur Rural',
        'Jaipur','Alwar','Bharatpur','Karauli–Dholpur','Dausa',
        'Tonk–Sawai Madhopur','Ajmer','Nagaur','Pali','Jodhpur','Barmer',
        'Jalore','Udaipur','Banswara','Chittorgarh','Rajsamand','Bhilwara',
        'Kota','Jhalawar–Baran'
    ],
    'Sikkim': ['Sikkim'],
    'Tamil Nadu': [
        'Thiruvallur','Chennai North','Chennai South','Chennai Central',
        'Sriperumbudur','Kancheepuram','Arakkonam','Vellore','Krishnagiri',
        'Dharmapuri','Tiruvannamalai','Arani','Villupuram','Kallakurichi',
        'Salem','Namakkal','Erode','Tiruppur','Nilgiris','Coimbatore',
        'Pollachi','Dindigul','Karur','Tiruchirappalli','Perambalur',
        'Cuddalore','Chidambaram','Mayiladuturai','Nagapattinam','Thanjavur',
        'Sivaganga','Madurai','Theni','Virudhunagar','Ramanathapuram',
        'Thoothukkudi','Tenkasi','Tirunelveli','Kanniyakumari'
    ],
    'Telangana': [
        'Adilabad','Peddapalle','Karimnagar','Nizamabad','Zahirabad',
        'Medak','Malkajgiri','Secunderabad','Hyderabad','Chevella',
        'Mahbubnagar','Nagarkurnool','Nalgonda','Bhongir','Warangal',
        'Mahabubabad','Khammam'
    ],
    'Tripura': ['Tripura East','Tripura West'],
    'Uttar Pradesh': [
        'Saharanpur','Kairana','Muzaffarnagar','Bijnor','Nagina','Moradabad',
        'Rampur','Sambhal','Amroha','Meerut','Baghpat','Ghaziabad',
        'Gautam Buddha Nagar','Bulandshahr','Aligarh','Hathras','Mathura',
        'Agra','Fatehpur Sikri','Firozabad','Mainpuri','Etah','Badaun',
        'Aonla','Bareilly','Pilibhit','Shahjahanpur','Kheri','Dhaurahra',
        'Sitapur','Hardoi','Misrikh','Unnao','Mohanlalganj','Lucknow',
        'Rae Bareli','Amethi','Sultanpur','Pratapgarh','Farrukhabad',
        'Etawah','Kannauj','Kanpur','Akbarpur','Jalaun','Jhansi','Hamirpur',
        'Banda','Fatehpur','Kaushambi','Phulpur','Allahabad','Barabanki',
        'Faizabad','Ambedkar Nagar','Bahraich','Kaiserganj','Shrawasti',
        'Gonda','Domariyaganj','Basti','Sant Kabir Nagar','Maharajganj',
        'Gorakhpur','Kushi Nagar','Deoria','Bansgaon','Lalganj','Azamgarh',
        'Ghosi','Salempur','Ballia','Jaunpur','Machhlishahr','Ghazipur',
        'Chandauli','Varanasi','Bhadohi','Mirzapur','Robertsganj'
    ],
    'Uttarakhand': ['Tehri Garhwal','Garhwal','Almora','Nainital–Udhamsingh Nagar','Haridwar'],
    'West Bengal': [
        'Cooch Behar','Alipurduars','Jalpaiguri','Darjeeling','Raiganj',
        'Balurghat','Malda North','Malda South','Murshidabad','Jangipur',
        'Baharampur','Krishnanagar','Ranaghat','Bishnupur','Tamluk',
        'Kanthi','Ghatal','Jhargram','Medinipur','Purulia','Bankura',
        'Bardhaman Purba','Bardhaman Durgapur','Hooghly','Serampore',
        'Arambagh','T.U.Dhaka','Howrah','Uluberia','Sreerampur',
        'Baharampur','Birbhum','Asansol','Durgapur','Jadavpur','Kolkata Dakshin',
        'Kolkata Uttar','Balurghat'
    ]
}

In [28]:
consti_to_state_map = {}
for state, constituencies in state_to_consti_map.items():
    for consti in constituencies:
        consti_to_state_map[consti] = state
df['State'] = df['State'].fillna(df['Name'].map(consti_to_state_map))
consti_to_state_map

{'Andaman and Nicobar Islands': 'Andaman and Nicobar Islands',
 'Araku': 'Andhra Pradesh',
 'Srikakulam': 'Andhra Pradesh',
 'Vizianagaram': 'Andhra Pradesh',
 'Visakhapatnam': 'Andhra Pradesh',
 'Anakapalli': 'Andhra Pradesh',
 'Kakinada': 'Andhra Pradesh',
 'Amalapuram': 'Andhra Pradesh',
 'Rajahmundry': 'Andhra Pradesh',
 'Narasapuram': 'Andhra Pradesh',
 'Eluru': 'Andhra Pradesh',
 'Machilipatnam': 'Andhra Pradesh',
 'Vijayawada': 'Andhra Pradesh',
 'Guntur': 'Andhra Pradesh',
 'Narasaraopet': 'Andhra Pradesh',
 'Bapatla': 'Andhra Pradesh',
 'Ongole': 'Andhra Pradesh',
 'Nandyal': 'Andhra Pradesh',
 'Kurnool': 'Andhra Pradesh',
 'Anantapur': 'Andhra Pradesh',
 'Hindupur': 'Andhra Pradesh',
 'Kadapa': 'Andhra Pradesh',
 'Nellore': 'Andhra Pradesh',
 'Tirupati': 'Andhra Pradesh',
 'Rajampet': 'Andhra Pradesh',
 'Chittoor': 'Andhra Pradesh',
 'Arunachal West': 'Arunachal Pradesh',
 'Arunachal East': 'Arunachal Pradesh',
 'Kokrajhar': 'Assam',
 'Dhubri': 'Assam',
 'Barpeta': 'Assam',
 

In [23]:
df['Type'].unique()

array(['', 'SC', 'ST'], dtype=object)

In [25]:
df['Type'] = df['Type'].replace('', 'OC')

In [26]:
df

Unnamed: 0,State,No.,Name,Type,Candidate,Party,%,Votes,Candidate.1,Party.1,%.1,Votes.1,%.2,Votes.2
0,Andhra Pradesh,2,Srikakulam,OC,Kinjarapu Ram Mohan Naidu,TDP,61.05%,754328,Perada Tilak,YSRCP,34.51%,426427,26.54%,327901
1,Andhra Pradesh,3,Vizianagaram,OC,Appalanaidu Kalisetti,TDP,57.20%,743113,Bellana Chandra Sekhar,YSRCP,38.00%,493762,19.19%,249351
2,Andhra Pradesh,4,Visakhapatnam,OC,Mathukumilli Bharat,TDP,65.42%,907467,Botsa Jhansi Lakshmi,YSRCP,29.07%,403220,36.35%,504247
3,Andhra Pradesh,5,Anakapalli,OC,C. M. Ramesh,BJP,57.50%,762069,Budi Mutyala Naidu,YSRCP,35.13%,465539,22.37%,296530
4,Andhra Pradesh,6,Kakinada,OC,Tangella Uday Srinivas,JSP,54.87%,729699,Chalamalasetti Sunil,YSRCP,37.62%,500208,17.26%,229491
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
537,Telangana,1,Adilabad,ST,Godam Nagesh,BJP,45.98%,568168,Athram Suguna,INC,38.65%,477516,7.33%,90652
538,Tripura,1,Tripura West,OC,Biplab Kumar Deb,BJP,72.85%,881341,Ashish Kumar Saha,INC,22.3%,269763,50.55%,611578
539,Uttar Pradesh,1,Saharanpur,OC,Imran Masood,INC,44.57%,547967,Raghav Lakhanpal,BJP,39.32%,483425,5.25%,64542
540,Uttarakhand,1,Tehri Garhwal,OC,Mala Rajya Lakshmi Shah,BJP,53.66%,462603,Jot Singh Gunsola,INC,22.05%,190110,31.61%,272493


In [27]:
df.to_csv('/Users/badrinathsanagavaram/Desktop/Election Data Analysis/Data/2024-election-dataset.csv')