# Create a dataframe of election dates by parsing the NDI website

This script retrieves country and election codes for the NDI website.

We use this code to build a DataFrame with election dates by country.

#### First we get the country codes

explicar qeue code_country.html for copiado diretamente do site para termos acesso aos codigos

In [2]:
f = open("code_country.html")

out = open("ndi_country_codes.csv","w")
out.write("Country,Code\n")
for line in f:
    line = line.strip()
    line = line.split('"')

    if len(line) != 3:
        continue
   
    code = line[1]  
    line = line[2].split("</")
    line = line[0].split(">")
    country = line[1]
    out.write(country+","+code+"\n")

out.close()

#### Second, we retrieve the codes for election types

In [3]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

r = requests.get("https://www.ndi.org/elections-calendar-all")
soup = BeautifulSoup(r.content) 

everything_soup = soup.find_all(lambda tag: tag.name == "select")

election_type_codes = {}
soup.get_text()
for sopa in everything_soup:
    children = sopa.findChildren("option")
    skip_first = 2
    for child in children:
        if skip_first:
            skip_first-=1
            continue
        election_type_value = child.get_attribute_list("value")[0]
        election_type = child.get_text()
        election_type_codes[election_type] = election_type_value
    break


pd.DataFrame.from_dict(election_type_codes, orient="index")

ndi_type_election_codes_2 = pd.DataFrame.from_dict(election_type_codes, orient = "index", columns=["Code"])
ndi_type_election_codes = ndi_type_election_codes_2.rename_axis('Election Type').reset_index()

ndi_type_election_codes.to_csv("ndi_type_election_codes.csv")

#### Now that we have everyting, we can buid NDI's urls and retrieve all information

In [4]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import StaleElementReferenceException
import pandas as pd
import time
from dateutil.parser import parse

options = Options()
fireFoxOptions = webdriver.FirefoxOptions()
fireFoxOptions.add_argument("--headless")
browser = webdriver.Firefox(options=fireFoxOptions)

country_codes = pd.read_csv("ndi_country_codes.csv")
election_type_codes = pd.read_csv("ndi_type_election_codes.csv")
years = range(2006,2022)

# Define which countries we are interested in

we do so by refering to an afrobarometer dataset and using country converter to assert country names are normalized

In [13]:
import country_converter as coco

df = pd.read_spss("../Afrobarometer/r7_merged.sav",usecols=["COUNTRY"])
df["COUNTRY"] = coco.convert(names=df.COUNTRY.tolist(), to='name_short', not_found = None)
my_countries = df["COUNTRY"].unique()

In [19]:
elections = []

for i in range(len(country_codes.index)):
    country_code = country_codes.iloc[i]["Code"]
    country_name = country_codes.iloc[i]["Country"]

    if country_name not in my_countries:
        continue

    print("Fecthing data for "+country_name)
    
    for j in range(len(election_type_codes.index)):
        election_type_code = election_type_codes.iloc[j]["Code"]
        election_type_name = election_type_codes.iloc[j]["Election Type"]

        url = "https://www.ndi.org/elections-calendar-all?country="+str(country_code)+"&election_type="+str(election_type_code)
        
        browser.get(url)
        time.sleep(5)
        while True:
            try:
                tags_dates = browser.find_elements(By.CLASS_NAME,"date-display-single")
                break
            except StaleElementReferenceException as e:
                time.sleep(0.2)
                continue
        
        for tag in tags_dates:
            if tag.text.__contains__("ALL DAY"):
                date = parse(tag.text.split('(')[0])
            else:
                date = parse(tag.text)
            elections.append((country_name,election_type_name,date))       
    

Fecthing data for Benin
Fecthing data for Botswana
Fecthing data for Burkina Faso
Fecthing data for Cameroon
Fecthing data for Gabon
Fecthing data for Ghana
Fecthing data for Guinea
Fecthing data for Kenya
Fecthing data for Lesotho
Fecthing data for Liberia
Fecthing data for Madagascar
Fecthing data for Malawi
Fecthing data for Mali
Fecthing data for Mauritius
Fecthing data for Morocco
Fecthing data for Mozambique
Fecthing data for Namibia
Fecthing data for Niger
Fecthing data for Nigeria
Fecthing data for Sao Tome and Principe
Fecthing data for Senegal
Fecthing data for Sierra Leone
Fecthing data for South Africa
Fecthing data for Sudan
Fecthing data for Tanzania
Fecthing data for Togo
Fecthing data for Tunisia
Fecthing data for Uganda
Fecthing data for Zambia
Fecthing data for Zimbabwe


In [20]:
dff = pd.DataFrame(elections,columns=["Country","Election type","date"])
dff["Country"].unique()

dff.to_csv("NDI_election_dates.csv")

In [21]:
df = pd.read_csv("NDI_election_dates.csv",index_col=[0])
df.to_parquet("election_dates.parquet")