# Setup

In [12]:
import sys
sys.path.append("../geojson")

from selenium.webdriver import Firefox
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from webdriver_manager.firefox import GeckoDriverManager

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys

import numpy as np
import pandas as pd

import os
import ast
import json
import requests
import time
from tqdm import tqdm


# Getting All Suburbs from RN

In [2]:
json_raw = "https://raw.githubusercontent.com/RonaldTheodoro/Estados-Cidades-e-Bairros/master/data.json"
resp = requests.get(json_raw)
RN_dict = json.loads(resp.text)["RN"]

# Scraping

- https://nominatim.openstreetmap.org/ui/search.html
- https://polygons.openstreetmap.fr

In [3]:
print("# 1. Init and Login")

profile = FirefoxProfile()
profile.set_preference("browser.download.panel.shown", False)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf;application/xls;text/plain; charset=utf-8; text/csv;application/json;charset=utf-8;application/pdf;text/plain;application/text;text/xml;application/xml;text/plain;text/x-csv;text/csv;application/vnd.ms-excel;application/csv;application/x-csv;text/csv;text/comma-separated-values;text/x-comma-separated-values;text/tab-separated-values;application/pdf")
profile.set_preference("browser.helperApps.neverAsk.openToDisk", "application/pdf;application/xls;text/plain; charset=utf-8; text/csv;application/json;charset=utf-8;application/pdf;text/plain;application/text;text/xml;application/xml;text/plain;text/x-csv;text/csv;application/vnd.ms-excel;application/csv;application/x-csv;text/csv;text/comma-separated-values;text/x-comma-separated-values;text/tab-separated-values;application/pdf")
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("download.prompt_for_download",False)
profile.set_preference("pdfjs.disabled", True)
profile.set_preference("browser.download.dir", data_path)
options = FirefoxOptions()
options.headless = True

driver = Firefox(executable_path=GeckoDriverManager().install(),
                 firefox_profile=profile)



# 1. Init and Login




[WDM] - Driver [C:\Users\lukan\.wdm\drivers\geckodriver\win64\v0.32.0\geckodriver.exe] found in cache


## OSM codes

In [5]:
nomination_url = "https://nominatim.openstreetmap.org/ui/search.html"

search_text_xpath = '//*[@id="q"]'
search_butn_xpath = '/html/body/section[2]/div/div[1]/form/div/div[2]/button'
details_xpath = '/html/body/div[2]/div[1]/div/div[1]/a'

osm_dict_city = {}
for city in tqdm(RN_dict.keys()):
    osm_dict_suburb = {}
    for suburb in RN_dict[city]:
        try:
            # 1. opening nominatim.openstreetmap.org
            driver.get(nomination_url)
    
            time.sleep(1)
            
            # 2. typing the suburb and submiting
            search_text_element = WebDriverWait(driver, 2).until(
                EC.presence_of_element_located((By.XPATH, search_text_xpath))
            )
            search_butn_element = WebDriverWait(driver, 2).until(
                EC.presence_of_element_located((By.XPATH, search_butn_xpath))
            )

            search_text_element.send_keys(f"{suburb}, {city}, Rio Grande do Norte")
            driver.execute_script("arguments[0].click();", search_butn_element)
            
            time.sleep(2)
            
            # 3. clicking for details
            details_element = WebDriverWait(driver, 2).until(
                EC.presence_of_element_located((By.XPATH, details_xpath))
            )

            driver.execute_script("arguments[0].click();", details_element)

            time.sleep(2)
            
            # 4. getting the osm code
            osm_dict_suburb[suburb] = driver.current_url.split("osmid=")[1].split("&")[0]
        
        except:
            osm_dict_suburb[suburb] = np.nan
    
    osm_dict_city[city] = osm_dict_suburb

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [14:40<00:00, 48.94s/it]


## Polygons

In [9]:
polygon_url = "https://polygons.openstreetmap.fr/?id={}"
geojson_city_polygons = {}
for city in tqdm(RN_dict.keys()):
    geojson_suburb_polygons = {}
    for suburb in RN_dict[city]:
#         if not pd.isna(osm_dict[suburb]):
        try:
            driver.get(polygon_url.format(osm_dict_city[city][suburb]))

            time.sleep(1)

            geojson_xpath = "/html/body/table/tbody/tr/td[6]/a"

            geojson_element = WebDriverWait(driver, 2).until(
                EC.presence_of_element_located((By.XPATH, geojson_xpath))
            )

            driver.execute_script("arguments[0].click();", geojson_element)

            time.sleep(1)

            geojson_xpath = "/html/body/pre"

            geojson_element = WebDriverWait(driver, 2).until(
                EC.presence_of_element_located((By.XPATH, geojson_xpath))
            )

            result = ast.literal_eval(geojson_element.text)

            geojson_suburb_polygons[suburb] = result
        
        except:
            geojson_suburb_polygons[suburb] = np.nan
    
    geojson_city_polygons[city] = geojson_suburb_polygons

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [14:51<00:00, 49.53s/it]


# Exporting GeoJson

In [13]:
with open("geojson/suburbs_RN.json", "w") as outfile:
    json.dump(geojson_city_polygons, outfile)