In [27]:
import json
import logging
import re
import sys
import time
import multiprocessing
import requests
import pandas as pd
import concurrent.futures

from typing import List, Dict, Tuple
from concurrent.futures import ProcessPoolExecutor
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

In [28]:
states = {
    'ABIA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=1',
 'ADAMAWA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=2',
 'AKWA IBOM': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=3',
 'ANAMBRA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=4',
 'BAUCHI': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=5',
 'BAYELSA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=6',
 'BENUE': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=7',
 'BORNO': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=8',
 'CROSS RIVER': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=9',
 'DELTA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=10',
 'EBONYI': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=11',
 'EDO': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=12',
 'EKITI': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=13',
 'ENUGU': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=14',
 'FCT': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=15',
 'GOMBE': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=16',
 'IMO': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=17',
 'JIGAWA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=18',
 'KADUNA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=19',
 'KANO': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=20',
 'KATSINA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=21',
 'KEBBI': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=22',
 'KOGI': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=23',
 'KWARA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=24',
 'LAGOS': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=25',
 'NASARAWA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=26',
 'NIGER': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=27',
 'OGUN': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=28',
 'ONDO': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=29',
 'OSUN': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=30',
 'OYO': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=31',
 'PLATEAU': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=32',
 'RIVERS': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=33',
 'SOKOTO': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=34',
 'TARABA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=35',
 'YOBE': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=36',
 'ZAMFARA': 'https://www.inecelectionresults.ng//elections/63f8f25b594e164f8146a213?state=37'
}



In [29]:
def get_lga_link(html: str) -> Dict[str, str]::
    """
    Returns:
    - A dictionary containing lgas name and its link
    
    """

    soup = bs(html,'lxml')
    lgas = {i.find('a').text:"https://www.inecelectionresults.ng/" +i.find('a')['href'] for i in soup.find_all('div',{'class':re.compile('m-2 p-2 bg-light')})}
    
    return lgas 

def get_lgas_links(state_url: str) -> Dict[str,str]:
    """
    Retrieves all lgas links for the current state
    
    Args:
    - state_url: state link
    
    Returns:
    - A dictionary containing lgas name and its corresponding link.
    
    """
    driver= webdriver.Chrome()
    driver.get(state_url)
    
    #wait for the lgas to be visible
    try:
        wait = WebDriverWait(driver, 100)
        element = wait.until(EC.text_to_be_present_in_element((By.TAG_NAME, "body"), "View Wards"))
    except:
        pass
    
    #gets all lgas
    lgas = get_lga_link(driver.page_source)

    driver.close()
    
    return lgas

In [30]:
def get_wards_links(lga_url: str,state: str, lga: str) -> Dict[str,str]:
    """
    Retrieves all wards links for the current state and lga
    
    Args:
    - driver: WebDriver object from Selenium.
    - lga_url: lga link
    
    Returns:
    - A list of dictionaries containing wards name and its corresponding link.
    
    """
    
    driver= webdriver.Chrome()
    driver.get(lga_url)
    
    #wait for the wards to be visible
    try:
        wait = WebDriverWait(driver, 100)
        element = wait.until(EC.text_to_be_present_in_element((By.TAG_NAME, "body"), "Polling units"))
    except:
        pass

    soup = bs(driver.page_source,'lxml')
    wards = {(state, lga, i.find('a').text):"https://www.inecelectionresults.ng/" +i.find('a')['href'] for i in soup.find_all('div',{'class':re.compile('m-2 p-2 bg-light')})}
    driver.close()
    
    return wards

In [31]:

def len_pus(ward_url: str, keys: tuple) -> None:
    
    global shared_list
    
    driver= webdriver.Chrome()
    driver.get(ward_url)
    #wait for the polling units to be visible
    try:
        wait = WebDriverWait(driver, 100)
        element = wait.until(EC.text_to_be_present_in_element((By.TAG_NAME, "body"), "PU Code"))
    except:
        pass
    #gets all PUs
    try:
        wait = WebDriverWait(driver, 100)
        element = wait.until(EC.text_to_be_present_in_element((By.TAG_NAME, "body"), "Polling Units"))
        #find the parent element that contains all PUs
        html_ward = driver.find_element(By.XPATH, '/html/body/app-root/div/app-activated/div/div/div/div/app-election-lga/div/div/div/div/div[2]/div/div[2]/div[2]/div').get_attribute('innerHTML')
        #find the container
        div = bs(html_ward,'lxml').find('body')
        #get immediate child of thr parent element
        pus = div.findAll("div" , recursive=False)
        len_pu = len(pus)
        
    except:
        ward_url = None
    shared_list.append([*keys,ward_url,len_pu])
    time.sleep(4)

In [32]:

manager = multiprocessing.Manager()
shared_list = manager.list()

In [33]:
def main(state):
    lgas = get_lgas_links(state)
    
    with concurrent.futures.ProcessPoolExecutor(max_workers=2) as executor:
        
        future_to_url = {executor.submit(get_wards_links, lgas[url],state,url): url for url in list(lgas.keys())}
        
        for future in concurrent.futures.as_completed(future_to_url):
            
            wards= future.result()
            
            with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
                
                future_to_url = {executor.submit(len_pus, wards[url],url): url for url in list(wards.keys())}
                
                for future in concurrent.futures.as_completed(future_to_url):
                    pass


In [1]:
with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
    
    future_to_url = {executor.submit(main, states[state]): state for state in list(states.keys())}
    
    for future in concurrent.futures.as_completed(future_to_url):
        
        print(future)


In [26]:
pd.DataFrame(data =list(shared_list)).to_csv('election.csv',index=False)