# Google Scraping
This script is intended to scrape data from Google Maps with the purposes of collecting basic information about regions and locations
Download the most recent version of the Chrome driver: https://chromedriver.chromium.org/downloads

In [14]:
# IMPORT PACKAGES
import arcpy
import os

from selenium import webdriver
from selenium.webdriver.common.by import By

import logging
logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)

# ABOUT THE TARGET GEODATABASE
sr = arcpy.SpatialReference(4326)

fldr = r"C:\Users\Laura\Documents\Keepsakes\Travel\TravelMaster"
name = r"Scratch"
gdb = os.path.join(fldr, (name + ".gdb"))

## Functions

In [15]:
# Get the element at a relative xpath and attribute, returns empty string on failure
def getElement(browser, xpath, attribute):
    try:
        element = browser.find_element_by_xpath(xpath).get_attribute(attribute).strip()

        if len(element) == 0:
            return ""
        else:
            return(element)

    except:
        return ""

# Scrape a Google Maps URL to return latitude and longitude as a list
def findCoordinates(url):
    coords = url.split("/@")
    coords = coords[1].split(",17z")[0]
    coords = coords.split(",")
    return coords[0:2]

def getName(browser):
    return getElement(browser, '//*[contains(@class, "DUwDvf")]', "innerText")

def getLocalName(browser):
    
    localname = getElement(browser, '//*[contains(@class, "bwoZTb")]', "innerText")
    
    if len(localname) == 0:
            return getName(browser)
    else:
        return localname

def getImageLink(browser):
    return getElement(browser, '//*[contains(@class, "aoRNLd")]//img[1]', "src")

def getSimpleDesc(browser):
    return getElement(browser, '//*[contains(@class, "PYvSYb")]', "innerText")

def getLocationType(browser):
    return getElement(browser, '//*[contains(@class, "DkEaL u6ijk")]', "innerText")

def getAddress(browser):
    return getElement(browser, '//*[contains(@data-item-id, "address")]', "innerText")

def getPhone(browser):
    return getElement(browser, '//*[contains(@data-item-id, "phone")]', "innerText")

In [16]:
regions = os.path.join(gdb, 'Regions')
fields = ['Reg_Name', 'Reg_LocalName', 'Reg_ImgLink', 'Country_ISO']

# Create update cursor for feature class 
with arcpy.da.UpdateCursor(regions, fields) as cursor:
    for row in cursor:

        # Open web driver and navigate to the page
        browser = webdriver.Chrome("C:\chromedriver-win32\chromedriver.exe")
        browser.get("https://www.google.com/maps")
        search_box = browser.find_element_by_xpath("//input[@id='searchboxinput']")
        search_box.send_keys(str(row[0] + ', ' + row[3]))
        browser.find_element_by_id("searchbox-searchbutton").click()
        browser.implicitly_wait(10)
        
        # Get the local name
        locName = getLocalName(browser)
        locString = [int(i) for i in locName.split() if i.isdigit()]
        if locName == None or len(locString) > 0:
            row[1] = row[0]
        else:
            row[1] = locName
        
        # Get image link
        img = getImageLink(browser)
        if img == None or "default_geocode" in img ==True:
            row[2] = None
        elif len(img) > 350:
            row[2] = 'Error'
        else:
            row[2] = img
        
        # Update the cursor with the updated list
        cursor.updateRow(row)
        browser.close()