# Scraping

In [1]:
# import libraries

import json
import csv
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

In [3]:
# set up files for regional indicators

data_folder = '../../data/demographics'

with open(f"{data_folder}/indicadores.json", 'r') as file:
    indicators = json.load(file)

for indicator, factors in indicators.items():
    with open(f"{data_folder}/raw/{indicator}.csv", mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['município'] + factors)   

In [None]:
def enter_country(driver : webdriver.Chrome):
    '''
    Navigates in the home page
    '''
    WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located((By.ID, 'search-txt'))
    )
    search_input = driver.find_element(By.ID, 'search-txt')
    search_input.click()
    WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located((By.ID, 'resultados'))
    )
    first_result = driver.find_element(By.CSS_SELECTOR, '#resultados li:first-child')
    first_result.click()
    time.sleep(3)

In [None]:
def enter_indicators(driver : webdriver.Chrome):
    '''
    Navigates in the indicators page
    '''
    WebDriverWait(driver, 30).until(
        EC.visibility_of_element_located((By.CLASS_NAME, 'ams-narrow-menu'))
    )
    first_narrow_menu_a = driver.find_element(By.CSS_SELECTOR, '.ams-narrow-menu li:first-child a')
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable(first_narrow_menu_a)
    )
    driver.execute_script("arguments[0].click();", first_narrow_menu_a)
    time.sleep(3)

In [None]:
def get_municipality_indicators(driver : webdriver.Chrome, municipality : str):
    '''
    Fetches and stores the indicator values for a municipality
    '''
    for indicator, factors in indicators.items():
        element = 'a[data-indicador="' + indicator + '"]'
        link = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, element))
        )
        link.click()
        row = [municipality]
        for factor in factors:
            time.sleep(0.05)
            try:
                grafico_div = driver.find_element(By.CSS_SELECTOR, 'div.' + factor)
                data_munrede_value = grafico_div.get_attribute('data-munrede')
                row.append(data_munrede_value)
            except NoSuchElementException:
                row.append(None)
        with open(f"{data_folder}/raw/{indicator}.csv", mode='a', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(row)

In [None]:
def enter_municipality(driver : webdriver.Chrome, index : int) -> bool:
    '''
    Selects a municipality in the indicators page
    '''
    dropdown_item = driver.find_element(By.CSS_SELECTOR, f'ul.dropdown-menu li:nth-child({index})')
    a_element = dropdown_item.find_element(By.TAG_NAME, 'a') if dropdown_item.find_elements(By.TAG_NAME, 'a') else None
    if a_element:
        last_span = a_element.find_elements(By.TAG_NAME, 'span')[-1]
        municipality = last_span.text
        dropdown_item.click()
        time.sleep(5)
        get_municipality_indicators(driver, municipality)
        return True
    return False

In [None]:
def traverse_municipalities(driver : webdriver.Chrome):
    '''
    Iterates through every municipality in the indicators page
    '''
    municipalities = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.dropdown'))
    )
    municipalities.click()
    hidden_dropdown = False
    item_count = len(driver.find_elements(By.CSS_SELECTOR, 'ul.dropdown-menu li'))
    for index in range(item_count):
        if hidden_dropdown == True:
            municipalities = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.dropdown'))
            )
            municipalities.click()
        hidden_dropdown = enter_municipality(driver, index + 1)

In [None]:
# web scraping

driver = webdriver.Chrome()

url = 'https://www.atlasmunicipiossaudaveis.pt/'
driver.get(url)

enter_country(driver)
enter_indicators(driver)
traverse_municipalities(driver)

time.sleep(3)

driver.quit()