# Code for scraping 2019 Indian Lok Sabha election result data from Election Commission of India (ECI) website

#### Import modules required

In [None]:
from bs4 import BeautifulSoup
import requests
from pprint import pprint
import pandas as pd
import numpy as np

#### Read State and Constituency codes from local file

In [None]:
with open("state_constituency_codes.html") as fp:
    soup = BeautifulSoup(fp)

In [None]:
state_constituency_codes = []
column_names = ['state', 'state_code', 'constituency', 'constituency_code', 'candidate_name', 'party', 'evm_votes', 'postal_votes', 'total_votes', "percent_of_votes"]
df = pd.DataFrame(columns=column_names)

In [None]:
for input in soup.find_all('input'):
    state_code = input['id']
    constituency_codes = [j for k in input['value'].split(";") if k != '' for j in k.split(",")[0:1]]
    state_constituency_codes.append({"state_code": state_code, "constituency_codes": constituency_codes})

#### Make requests to ECI website and scrap relevant data

In [None]:
# [0:13],[13:14],[14:len(state_constituency_codes)]
# We will have to run below code thrice using different sublist of state_constituency_codes 
# defined above
# because data for jammu & kashmir state has some extra data (migrant votes)

column_names = ['state', 'state_code', 'constituency', 'constituency_code', 'candidate_name', 'party', 'evm_votes', 'postal_votes', 'total_votes', "percent_of_votes"]
#must use column_names in next line when using sublist[13:14]
#column_names = ['state', 'state_code', 'constituency', 'constituency_code', 'candidate_name', 'party', 'evm_votes', 'migrant_votes', 'postal_votes', 'total_votes', "percent_of_votes"]
for item in state_constituency_codes[0:13]: #again run same code by replacing [0:13] with [13:14] & [14:len(state_constituency_codes)]
    state_code = item['state_code']
    print(state_code, end="\n----------\n")
    for code in item["constituency_codes"]:
        constituency_code = code
        url = "http://results.eci.gov.in/pc/en/constituencywise/Constituencywise" + state_code + constituency_code + ".htm"
        response = requests.get(
            url,
            params={'ac': constituency_code}
        )
        soup = BeautifulSoup(response.content, "html.parser")
        body = soup.body
        tables = body.find_all('table')
        table = tables[10]
        tbody = table.tbody
        trs = tbody.find_all('tr')
        state_cons = trs[0].text.strip().split("-", 1)
        state, constituency = [item for item in state_cons]
        for tr in trs[3: -1]:
            new_row = [state, state_code, constituency, constituency_code]
            tds = tr.find_all('td')
            for td in tds[1:len(tds)]:
                new_row.append(td.text.strip())
            new_df = pd.DataFrame(np.array(new_row).reshape(1,len(new_row)), columns=column_names)
            df = df.append(new_df, ignore_index=True, sort=False)
print('<--EXIT-->')

In [None]:
df.shape

In [None]:
# df['migrant_votes'].fillna(0, inplace=True)
df['evm_votes'] = pd.to_numeric(df['evm_votes'])
df['postal_votes'] = pd.to_numeric(df['postal_votes'])
df['total_votes'] = pd.to_numeric(df['total_votes'])
df['percent_of_votes'] = pd.to_numeric(df['percent_of_votes'])
df['migrant_votes'] = pd.to_numeric(df['migrant_votes'])

#### Save data in local csv file

In [None]:
df.to_csv('Election Results 2019.csv')