# This project scrapes Delhi 2020 election data from the ECI website. 

## For each of the 70 constituencies, the following variables have been scraped: name of constituency, number of contesting candidates, total votes polled, winning party in 2020, margin of victory in 2020, winning party in 2015, margin of victory in 2015

### The code can be executed through the main() at the very bottom

In [39]:
import csv
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [40]:
#this function scrapes three variables - name of constituency, number of candidates, total votes polled - for each constituency
def scrape_name_candidates_votes(page_number):
    name_of_const = []
    #the tentative variables for name of constituency are used to get to the final name (name_of_const) 
    tentative1_name_of_const = []
    tentative2_name_of_const = []
    
    number_of_candidates = []
    
    total_votes = []
    #td_list is used to scrape td tags, in order to append total_votes 
    td_list = []

    for i in (page_number):
                #the variable page goes to each URL on the website
                page = requests.get('http://results.eci.gov.in/DELHITRENDS2020/ConstituencywiseU05' 
                                    + i +'.htm')
                soup = BeautifulSoup(page.content, 'html.parser')
                #the next three lines of code scrape number of candidates in each constituency
                full_table = soup.find_all(attrs={"style":"font-size:12px;"})
                if (len(full_table) is not 0):
                    number_of_candidates.append(len(full_table))
                # the next 4 lines scrape total votes polled in each constituency
                vote_row = soup.find(attrs={"style":"color:#000000;background-color:White;
                                             border-color:#673033;border-width:1px;border-style:
                                             Solid;font-family:Calibri;font-size:Small;background-color: pink"})
                if (vote_row is not None):
                        td_list = vote_row.find_all("td")
                        total_votes.append(td_list[5].get_text())
                
                name_row = soup.find(attrs={"style": "height: 20px; background-color:#FFC0CD; 
                                            color:Black; font-weight: bold"})
                if (name_row is not None):
                      # the name is split on "-" because it's initially scraped as <Name of State-Name of Constituency>
                        tentative1_name_of_const.append(name_row.find("td").get_text())
                        tentative2_name_of_const.append(tentative1_name_of_const[-1].strip().split("-")) 
    #the tuple is used to extract Name of Constituency after splitting on "-"
    temporary_tuple = tuple(tentative2_name_of_const)
    name_of_const = [t[1] for t in temporary_tuple]
    combined_name_candidates_votes = list(zip(name_of_const, number_of_candidates, total_votes))
    
    return (combined_name_candidates_votes)


SyntaxError: EOL while scanning string literal (<ipython-input-40-d1c23a20aae7>, line 24)

In [41]:
#this function scrapes three variables - name of constituency, winning party in 2020, margin of victory in 2020, winning party in 2015, margin of victory in 2015 - for each constituency
def scrape_name_margin_party(page_number):
    name_of_const = []
    
    margin_2020 = []
    winning_party_2020 = []
    
    margin_2015 = []
    winning_party_2015 = []
    
    for i in (page_number):
                page = requests.get('http://results.eci.gov.in/DELHITRENDS2020/statewiseU05' + 
                                    i + '.htm')
                soup = BeautifulSoup(page.content, 'html.parser')
                full_table = soup.find_all(attrs={"style":"font-size:12px;"})
                for td in full_table:
                    name_of_const.append(td.find('td').get_text())
                   
                    margin_2020.append(td.find(attrs={"align":"right"}).get_text())
                    winning_party_2020.append(td.find("tbody").find("td").get_text())
                    
                    data_2015 = td.find_all(attrs={"style": "background-color: lightgray;"})
                    winning_party_2015.append(data_2015[1].get_text())
                    margin_2015.append(data_2015[2].get_text())
                    
    combined_name_margin_party = list(zip(name_of_const, winning_party_2020, margin_2020, winning_party_2015, margin_2015))
    #print(combined_name_margin_party)
    #print(type(combined_name_margin_party))
    return(combined_name_margin_party)

In [42]:
#this function merges the above two dataframes
def data_merge(combined_name_candidates_votes, combined_name_margin_party):
    const_cand_votes_df = pd.DataFrame(np.array(combined_name_candidates_votes)
                                       .reshape(len(combined_name_candidates_votes),3), 
                                       columns = ['Constituency','Number_of_candidates', 'Total_votes_2020'])
    const_party_margin_df = pd.DataFrame(np.array(combined_name_margin_party)
                                         .reshape(len(combined_name_margin_party),5), 
                                         columns = ['Constituency' , 'Winning_party_2020', 'Margin_2020', 'Winning_party_2015', 'Margin_2015'])
    
    data_Delhi_merged = pd.merge(const_cand_votes_df, const_party_margin_df, on='Constituency')
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        print(data_Delhi_merged.head())
    
    return(data_Delhi_merged)


In [43]:
#this function exports all the data to the user's harddrive
def write_merged_csv_file(data_Delhi_merged):
    data_Delhi_merged.to_csv('Delhi_election_data.csv', sep=',') 

In [44]:
#this function generates the different URLs on the ECI website from where the data is scraped
def generate_page_numbers():
    page_number = []
    for i in range(1, 6):
        str_i = str(i)
        page_number.append(str_i)
    
    return page_number

In [45]:
def main():    
    page_number = generate_page_numbers()
    combined_name_candidates_votes = scrape_name_candidates_votes(page_number)
    combined_name_margin_party = scrape_name_margin_party(page_number)
    data_Delhi_merged = data_merge(combined_name_candidates_votes, combined_name_margin_party)
    #write_merged_csv_file(data_Delhi_merged)


In [46]:
main()

   Constituency Number_of_candidates Total_votes_2020 Winning_party_2020  \
0        NARELA                   12           165694    Aam Aadmi Party   
1        BURARI                   23           222256    Aam Aadmi Party   
2  ADARSH NAGAR                    8           103752    Aam Aadmi Party   
3         BADLI                   14           139638    Aam Aadmi Party   

  Margin_2020 Winning_party_2015 Margin_2015  
0       17429    Aam Aadmi Party       40292  
1       88158    Aam Aadmi Party       67950  
2        1589    Aam Aadmi Party       20741  
3       29123    Aam Aadmi Party       35376  
