Import necessary libraries.

In [5]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime, timedelta
import time

Open a Chrome window with the initial URL containing the name of the city, origin code, zone type and first date for which you want to download a dataset.

In [6]:
# Load Chrome driver and movement.uber.com/cities website
PATH = 'C:\Program Files (x86)\chromedriver.exe'
driver = webdriver.Chrome(PATH)

# Attributing the city name and the center-most zone code (or origin) to variables so they can be inserted in the URL later
city = 'stockholm'
origin_code = '655'
coordinates = ''
zone_type = 'basomrade' # or 'taz' or 'hex' or others

# Open URL for the first day in the desired city (change coordinates depending on the city)
driver.get('https://movement.uber.com/explore/' + city + '/travel-times/query?si' + origin_code + '&ti=&ag=' + zone_type + '&dt[tpb]=ALL_DAY&dt[wd;]=1,2,3,4,5,6,7&dt[dr][sd]=' + 
           '2016-01-02' + '&dt[dr][ed]=' + '2016-01-02' + '&cd=&sa;=&sdn=' + coordinates + '&lang=en-US')

Define functions for clicking on the privacy settings "opt-in" button, skipping the initial guide pop-up window, filling personal info on a form that appears only the first time you try to donwload a dataset on that window, and clicking on the donwload dataset button.

In [7]:
# Defining the necessary functions for the first page
def choosePrivacy():
    """
    Function that clicks on the opt out button of some privacy settings.
    """
    try:
        priv_pref_button = driver.find_element_by_id('privacy_pref_optin')
        priv_pref_button.click()
        
    except:
        pass

    
def pressSkip():
    """
    Function that presses skip on the "Understanding origins" pop-up window.
    """
    try:
        time.sleep(5)
        skip_button = driver.find_element_by_css_selector('div.af.bk')
        skip_button.click()
            
    except:
        pass


def fillPersonalInfo():
    """"
    Function that fills in the personal information before finally downloading the csv file.
    """
    name = ''
    last_name = ''
    email = ''
        
    first_name = driver.find_element_by_id('firstName')
    first_name.send_keys('Luiz')

    last_name = driver.find_element_by_id('lastName')
    last_name.send_keys('Scheuer')
                                       
    email = driver.find_element_by_id('email')
    email.send_keys('luizgscheuer@gmali.com')

    purpose_dropdown = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div/div[3]/div/div[2]/div/div/div/form/div[3]/div/div/div/div[1]')
    purpose_dropdown.click()

    purpose = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[3]/div/div/div/div/ul/li[6]')
    purpose.click()

    opt_out_newsletter = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div[2]/div/div[3]/div/div[2]/div/div/div/form/div[5]/div/div/label[2]/div[2]')
    opt_out_newsletter.click()
    

# Start actions
# Agree to privacy preferences
choosePrivacy()
        
    
# Skip button only shows up the first time you open the Chrome browser
pressSkip()

time.sleep(1.5)
# Choosing correct data parameters (Traffic Analysis Zone) and opening date bar in preparation for the calendar loop
# Click on the zone type dropdown list
# zone_type_dropdown = WebDriverWait(driver, 8).until(
#     EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div[3]/div/div[2]/div/div/div/div/div[1]/div[2]/div')))
# zone_type_dropdown.click()


# Choose desired zoning type (different from city to city)
# zoning_type = WebDriverWait(driver, 10).until(
#     EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[2]/div/div/div/div/div/ul/li[2]')))
# zoning_type.click()
        

# Click on "Download data"
download_button = driver.find_element_by_css_selector('div.f5 button')
download_button.click()


# Click on the correct csv file
travel_times_download = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div/div/div[3]/div/div[2]/div/div[1]/div/button[1]')
travel_times_download.click()
time.sleep(1)

fillPersonalInfo()

# Download csv file
download_button2 = driver.find_element_by_xpath('//button[contains(@type, "submit")]')
download_button2.click()
time.sleep(2)


This is the main block of code of this dpnwload bot. The getURL function generates a new URL with the desired date (and jumps to next next date) while the second part focuses on executing the URL and going through an iterator, to let Python know that after we are donw with downloading the dataset for this page, we are ready to jump to the next one.

Also, print the number of generated URLs to see if all desired URLs (or dates) are included.

In [None]:
# Create URLs for the desired date range
def getURL():
    """"
    Function that creates one URL per date between the specified date range
    """
    date = datetime(2016,1,3)
    while date <= datetime(2020,3,31):
        yield ('https://movement.uber.com/explore/' + city + '/travel-times/query?si=' + origin_code + '&ti=&ag=' + zone_type + '&dt[tpb]=ALL_DAY&dt[wd;]=1,2,3,4,5,6,7&dt[dr][sd]=' +
               date.strftime('%Y-%m-%d') + '&dt[dr][ed]=' + date.strftime('%Y-%m-%d') + '&cd=&sa;=&sdn=' + coordinates + '&lang=en-US')
        date += timedelta(days=1)


# Perform iteration through URLs downloading the datasets for each URL
iterated_URLs = []
i = 0
print('Number of generated URLs: ' + str(len(list(getURL()))))
for url in getURL():
    i += 1
    driver.execute_script("window.open('"+url+"', '_self')")
    iterated_URLs.append(url)
    print(url)
    
    # zone_type_dropdown
    # zoning_type
    time.sleep(2)
        
    # Click on "Download data"
    download_button = driver.find_element_by_css_selector('div.f5 button') # //button[contains(@data-baseweb, "button")]')
    download_button.click()
    time.sleep(3.5)
    
    # Download csv file
    travel_times_download = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[2]/div/div/div[3]/div/div[2]/div/div[1]/div/button[1]')))
    # travel_times_download = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div/div/div[3]/div/div[2]/div/div[1]/div/button[1]')
    travel_times_download.click()
    time.sleep(2)

Number of generated URLs: 1550
https://movement.uber.com/explore/stockholm/travel-times/query?si655&ti=&ag=basomrade&dt[tpb]=ALL_DAY&dt[wd;]=1,2,3,4,5,6,7&dt[dr][sd]=2016-01-03&dt[dr][ed]=2016-01-03&cd=&sa;=&sdn=&lang=en-US
https://movement.uber.com/explore/stockholm/travel-times/query?si655&ti=&ag=basomrade&dt[tpb]=ALL_DAY&dt[wd;]=1,2,3,4,5,6,7&dt[dr][sd]=2016-01-04&dt[dr][ed]=2016-01-04&cd=&sa;=&sdn=&lang=en-US
https://movement.uber.com/explore/stockholm/travel-times/query?si655&ti=&ag=basomrade&dt[tpb]=ALL_DAY&dt[wd;]=1,2,3,4,5,6,7&dt[dr][sd]=2016-01-05&dt[dr][ed]=2016-01-05&cd=&sa;=&sdn=&lang=en-US
https://movement.uber.com/explore/stockholm/travel-times/query?si655&ti=&ag=basomrade&dt[tpb]=ALL_DAY&dt[wd;]=1,2,3,4,5,6,7&dt[dr][sd]=2016-01-06&dt[dr][ed]=2016-01-06&cd=&sa;=&sdn=&lang=en-US
https://movement.uber.com/explore/stockholm/travel-times/query?si655&ti=&ag=basomrade&dt[tpb]=ALL_DAY&dt[wd;]=1,2,3,4,5,6,7&dt[dr][sd]=2016-01-07&dt[dr][ed]=2016-01-07&cd=&sa;=&sdn=&lang=en-US
http