The purpose of this notebook is to provide a quick and easy way to automate the SDC 500m download process

In [10]:
#Load in the packages
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.alert import Alert
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyproj import Proj
import pandas as pd
import os
import time

In [11]:
#Define a function to download the SDC file for the city of interest
def SDC_download_pipeline(chrome_driver_path, download_folder, link_to_SDC_txt, first_year, last_year, username, password, lon, lat):
    #chrome_driver_path is the path to the chromedriver
    #download_folder is the path to folder where you want the data to be downloaded
    #link_to_SDC_text is the link to the download_link.txt file
    #first_year and last_year are the earliest and latest years you want to pull the SDC data for respectively
    #username and password are your username and password for accessing the SDC website
    #Lon and lat are the latitude and longitude for the city you want the SDC data for

    #Set the path to our chromedriver
    chromedriver_path = chrome_driver_path

    #Create a Service object with the path to the chromedriver
    service = Service(chromedriver_path)

    #Define the download directory
    download_directory = download_folder

    #Set Chrome preferences
    chrome_prefs = {
        "download.default_directory": download_directory,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": True
    }

    #Initialize the Chrome options and add the preferences to Chrome options
    chrome_options = Options()
    chrome_options.add_experimental_option("prefs", chrome_prefs)

    #Initialize the WebDriver with the service and options
    driver = webdriver.Chrome(service = service, options = chrome_options)

    #Now we can access the SDC website. First, load in the txt file to get the list of links corresponding to each of the years
    links = pd.read_csv(link_to_SDC_txt, sep = "\s+", header = None, names = ["Year", "URL"], skiprows = 1)

    #Filter the dataframe based on these years
    links = links.loc[(links["Year"] >= first_year) & (links["Year"] <= last_year), "URL"].tolist()

    #Now we will convert the latitude and longitude values into its corresponding MODIS tile by using pyproj
    #Define the upper-left corner X, upper-left corner Y, and tile size
    WORLD_ULC_X = -20015109.354
    WORLD_ULC_Y = 10007554.677
    TILE_SIZE = 1111950

    #Define the MODIS sinusoidal projection
    modis_proj = Proj("+proj=sinu +R=6371007.181 +lon_0=0")
    x, y = modis_proj(lon, lat)

    #Compute h and v
    h = int((x - WORLD_ULC_X) / TILE_SIZE)
    v = int((WORLD_ULC_Y - y) / TILE_SIZE)

    tile_name = f"h{h:02d}v{v:02d}"

    #Now we will loop through the links which will correspond to getting the data for all of the years we listed
    for data_link_num in range(len(links)):
        driver.get(links[data_link_num])
        if data_link_num == 0:
            driver.find_element(By.LINK_TEXT, "Login").click()
            element = driver.find_element(By.CSS_SELECTOR, "body > div:nth-child(1)")
            driver.execute_script("if(arguments[0].contentEditable === 'true') {arguments[0].innerText = '1 <p>1</p>'}", element)
            driver.find_element(By.ID, "horizontal_login_account").send_keys(username)
            driver.find_element(By.ID, "horizontal_login_password").send_keys(password)
            driver.find_element(By.CSS_SELECTOR, ".sub_btn").click()
            element = driver.find_element(By.CSS_SELECTOR, "body > div:nth-child(1)")
            driver.execute_script("if(arguments[0].contentEditable === 'true') {arguments[0].innerText = '1 <p>1</p>'}", element)

        #Wait for the page to load
        wait = WebDriverWait(driver, 10)

        #Locate all of the files of the different sinusoidal squares
        file_elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".ant-tree-node-content-wrapper-close .ant-tree-title")))

        #Now we will need to get the corresponding folder that matches our tile_name
        for file_num in range(len(file_elements)):
            #Get the file in Selenium
            file = file_elements[file_num]

            #Get all of the dropdown buttons
            dropdowns = driver.find_elements(By.CSS_SELECTOR, ".ant-tree-switcher_close")

            if tile_name in file.text:
                #Click the file
                file.click()

                #Click the corresponding dropdown arrow
                dropdown_arrow = dropdowns[file_num]
                dropdown_arrow.click()

                break
        
        driver.implicitly_wait(20)

        #Now we need to run through each band and download the data
        #Should do b01-b06 only
        #Should go up to 7 since we want b01 through b06
        for band in range(1, 7):
            #Prevent double-clicking
            time.sleep(5)
            #Click on the correct band
            driver.find_element(By.CSS_SELECTOR, f".ant-tree-treenode-switcher-close:nth-child({band}) .ant-tree-node-content-wrapper-normal").click()

            #Now get the pages we need to loop through to get all of the data
            page_buttons = driver.find_elements(By.CSS_SELECTOR, ".ant-pagination-item a")
            total_pages = len(page_buttons)

            #Now we loop through each page
            for page_num in range(1, total_pages + 1):
                time.sleep(2)
                #Click on the page button for the current page
                page_selector = f".ant-pagination-item-{page_num} a"
                page_button = driver.find_element(By.CSS_SELECTOR, page_selector)
                driver.execute_script("arguments[0].click();", page_button)
                #Wait for 4 seconds 
                time.sleep(4)

                #Let the page load
                driver.implicitly_wait(10)

                #Find all of the data files to download and get the total count
                data_files = driver.find_elements(By.CSS_SELECTOR, ".ant-table-row .checkbox-btn")
                total_files = len(data_files)

                #Now we run through all of the files
                for data_file in range(1, total_files + 1):
                    checkbox = driver.find_element(By.CSS_SELECTOR, f".ant-table-row:nth-child({data_file}) .checkbox-btn")
                    driver.execute_script("arguments[0].click();", checkbox)
                    #Wait for 2 seconds to avoid doubleclicking
                    time.sleep(2)
                    print(data_file)

  links = pd.read_csv(link_to_SDC_txt, sep = "\s+", header = None, names = ["Year", "URL"], skiprows = 1)


In [12]:
#Run this function to download the SDC file for whatever city is of interest
#Path to chrome driver
chrome_drive = "/Volumes/Seagate Portable Drive/Central_Park_Climate/Downloading_SDC_500/chromedriver-mac-arm64/chromedriver"

#Path to the folder you want your data files to be downloaded to
download_fold = "/Volumes/Seagate Portable Drive/Central_Park_Climate/Downloading_SDC_500/Data_Files"

#Path to the SDC 500 download link text file
SDC_text_link = "/Volumes/Seagate Portable Drive/Central_Park_Climate/Downloading_SDC_500/download_link.txt"

#Enter the first year you want SDC data for
first = 2016

#Enter the last year you want SDC data for
last = 2017

#Enter your username as a string
user = ""

#Enter your password as a string
password = ""

#Enter the latitude and longitude values of the city you want data for
latitude = 40.730610
longitude = -73.935242

#Note, you may need to click "Allow" when the webpage asks whether or not you want to download all of these files
SDC_download_pipeline(chrome_driver_path = chrome_drive, download_folder = download_fold, link_to_SDC_txt = SDC_text_link, 
                      first_year = first, last_year = last, username = user, password = password, lon = longitude, lat = latitude)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
1
2
3
4
5
6
7

In [40]:
#Check to make sure the files are all there
os.listdir(download_fold)

['SDC500_V10_h12v04_2016051_SR_b03.tif',
 'SDC500_V10_h12v04_2017002_SR_b03.tif',
 'SDC500_V10_h12v04_2017003_SR_b01.tif',
 'SDC500_V10_h12v04_2017003_SR_b02.tif',
 'SDC500_V10_h12v04_2017003_SR_b03.tif',
 'SDC500_V10_h12v04_2017051_SR_b01.tif',
 'SDC500_V10_h12v04_2017051_SR_b02.tif',
 'SDC500_V10_h12v04_2017051_SR_b03.tif',
 'SDC500_V10_h12v04_2017052_SR_b01.tif',
 'SDC500_V10_h12v04_2017052_SR_b02.tif',
 'SDC500_V10_h12v04_2017052_SR_b03.tif',
 'SDC500_V10_h12v04_2017053_SR_b01.tif',
 'SDC500_V10_h12v04_2017053_SR_b02.tif',
 'SDC500_V10_h12v04_2017053_SR_b03.tif',
 'SDC500_V10_h12v04_2016001_SR_b01.tif',
 'SDC500_V10_h12v04_2016001_SR_b02.tif',
 'SDC500_V10_h12v04_2016001_SR_b03.tif',
 'SDC500_V10_h12v04_2016002_SR_b01.tif',
 'SDC500_V10_h12v04_2016002_SR_b02.tif',
 'SDC500_V10_h12v04_2016002_SR_b03.tif',
 'SDC500_V10_h12v04_2016003_SR_b01.tif',
 'SDC500_V10_h12v04_2016003_SR_b02.tif',
 'SDC500_V10_h12v04_2016003_SR_b03.tif',
 'SDC500_V10_h12v04_2016051_SR_b01.tif',
 'SDC500_V10_h12