In [None]:
import pandas as pd
import re
import os
import requests
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By

def addzero(x):
    if len(str(x)) == 1:
        x = '0' + str(x)
    return str(x)

def get_URL (startdate, enddate, freq = '1M', metdata='reanalysis'):
    datelist = []
    urllist = []
    dateprint =''
    dates = pd.date_range(start= str(startdate),end = str(enddate), freq = freq)
    for item in dates:
        dateprint = dateprint + str(item) +', '
    print ('Metdata is '+ metdata + '. The dates of files for downloading are generated: '+ dateprint)
    if metdata == 'reanalysis':
        for item in dates:
            date = str(item.year) + addzero(item.month)
            metfile = 'RP' + date + '.gbl'
            url = 'https://www.ready.noaa.gov/ready2-bin/extract/extract1a.pl?' + 'xtype=1' + '&metdata=' + metdata + '&metdatasm=reanalysis'  + '&metfile=' + metfile
            datelist.append (date)
            urllist.append (url)
    elif metdata == 'GDAS0P5':
        for item in dates:
            date = str(item.year) + addzero(item.month) + addzero(item.day)
            metfile = date
            url = 'https://www.ready.noaa.gov/ready2-bin/extract/extract1a.pl?' + 'xtype=1' + '&metdata=' + metdata + '&metdatasm=gdas0p5'  + '&metfile=' + metfile + '_gdas0p5'
            datelist.append (date)
            urllist.append (url)
    elif metdata == 'NAM12':
        for item in dates:
            date = str(item.year) + addzero(item.month) + addzero(item.day)
            metfile =date
            url = 'https://www.ready.noaa.gov/ready2-bin/extract/extract1a.pl?' + 'xtype=1' + '&metdata=' + metdata + '&metdatasm=nam12'  + '&metfile=' + metfile + '_nam12' 
            datelist.append (date)
            urllist.append (url)
    resultlist = [datelist, urllist]
    return resultlist

def post_param (url, latL, lonL, latR, lonR):
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless') #if you want to see the process, please type a '#' before the chrome_option...
    browser = webdriver.Chrome(options=chrome_options)
    wait = WebDriverWait(browser, 120)
    errornumber = ''
    dlurl = ''
    dlfig = ''
    project = ''
    try:
        browser.get(url)
        latLinput = wait.until (EC.presence_of_element_located((By.CSS_SELECTOR,'[name=latL]')))
        lonLinput = wait.until (EC.presence_of_element_located((By.CSS_SELECTOR,'[name=lonL]')))
        latRinput = wait.until (EC.presence_of_element_located((By.CSS_SELECTOR,'[name=latR]')))
        lonRinput = wait.until (EC.presence_of_element_located((By.CSS_SELECTOR,'[name=lonR]')))
        submitbutton = wait.until (EC.element_to_be_clickable((By.CSS_SELECTOR, '.center [type=submit]')))
        latLinput.send_keys(latL)
        lonLinput.send_keys(lonL)
        latRinput.send_keys(latR)
        lonRinput.send_keys(lonR)
        submitbutton.click()
    
        project = re.findall('https.*?proc=(.*?)&x=0', browser.current_url)[0]
        submitbutton2 = wait.until (EC.element_to_be_clickable((By.CSS_SELECTOR, '.center [type=submit]')))
        submitbutton2.click()
    
        dlfig = wait.until (EC.presence_of_element_located((By.CSS_SELECTOR,'div center a'))).get_attribute('href')
        dlurl = wait.until (EC.presence_of_element_located((By.CSS_SELECTOR,'div center h2 a'))).get_attribute('href')
    except:
        errornumber = 'Timeout!' 
    finally:
        browser.close()
        if dlurl != '':
            print ('Successfully get Download URL.' + ' Posted URL:' + str(url) +'. Project Number is '+ str(project) + ', Download URL:' + str(dlurl) +'.' )
        else:
            print ('CAN NOT get Download URL.' + ' Posted URL:' + str(url) +'. It may be timeout or the parameters are wrong.')
        return [project, dlurl, dlfig, errornumber] 

def downloaddata (date, dataurl, figurl):
    Dlornot = ''
    date = str(date)
    try:
        project = re.findall('htt.*?extract_(.*?).zip', dataurl)[0]
        data = requests.get(dataurl)
        year = date[0:4]
        if not os.path.exists(year):
            os.mkdir(year)
        datapath = year + '/' + date + ' ' + project + '.zip'
        with open(datapath, 'wb') as f:
            f.write(data.content) 

        fig = requests.get(figurl)
        datapath = year + '/' + date + ' ' + project + '.gif'
        with open(datapath, 'wb') as f:
            f.write(fig.content)
        Dlornot = 'Successfully downloaded!'
        print ('Data of date:' + str(date) + ', project number:' + str(project) + ' has successfully downloaded.' )
    except:
        Dlornot = 'Fail to download.'
        print ('Data of date:' + str(date) + ', project number:' + str(project) + ' failed to download.' )
    finally:    
        return Dlornot

def main(startdate, enddate, latL, lonL, latR, lonR, freq='1M', metdata='reanalysis'):
    project = []
    dlurl = []
    dlfig = []
    errornumber = []
    dlornotlist = []
    
    getURL = get_URL (startdate= startdate, enddate = enddate, freq=freq, metdata=metdata )
    datelist = getURL[0]
    URLlist = getURL[1]
    print ('Start to get data URL...')
    for pageURL in URLlist:
        postlist = post_param (url=pageURL, latL=latL, lonL=lonL, latR=latR, lonR=lonR)
        project.append (postlist[0])
        dlurl.append (postlist[1])
        dlfig.append (postlist[2])
        errornumber.append (postlist[3])
    print ('Start to download data... (If this process fails, you can download manually.)')
    for x in range(0,len(dlurl)):
        dlornot = downloaddata(date = datelist[x], dataurl = dlurl[x], figurl= dlfig[x])
        dlornotlist.append (dlornot)
    
    resultreport = pd.DataFrame({'Date': datelist,
                                 'Project': project,
                                 'Downloaded': dlornotlist,
                                 'Error': errornumber,
                                 'DataDlURL': dlurl,
                                 'FigDlURL':dlfig})
    savepath = str(datelist[0]) + '-' + str(datelist [-1]) + ' ' + 'report.xlsx'
    resultreport.to_excel(savepath, sheet_name='report', index=False)
    
    print ('All work has done. The report has been created.')

In [None]:
#main(startdate = 19580401, enddate = 19580501,freq = '1M', metdata = 'reanalysis', latL = '-10', lonL = '10', latR = '90', lonR = '180') #for 'REANALYSIS' each month
main(startdate = 20110401, enddate = 20110401,freq = '1D', metdata = 'GDAS0P5', latL = '-10', lonL = '10', latR = '90', lonR = '180') # for 'GDAS 0.5 Degree' each day
#main(startdate = 20110401, enddate = 20110401,freq = '1D', metdata = 'NAM12', latL = '10', lonL = '-100', latR = '90', lonR = '-60') #for 'NAM 12km' each day, this data is only for U.S.

#It needs pandas, requests, selenium and selenium webdriver.
#Download it by typing "pip install XX"(XX = pandas, requests, selenium.) in Anaconda Prompt (anaconda3) (if you use Anaconda) or CMD.
#For selenium webdriver, in this code, Chrome is used. Please download it in: https://sites.google.com/a/chromium.org/chromedriver/downloads

#startdate and enddate MUST have Year, Month and day. Although Day is not necessary for reanalysis data.
#latL means Lower-left Latitude, lonL means Lower-left Longitude, 
#Similarly, latR means Upper-right Latitude, lonR means Upper-right Longitude.
#such 4 coordinates can form a rectangle and download the data in this rectangle.
#If you are not familiar with other parameters, please DO NOT modify other parameters.
#-------------------------------------------------------------------------------------------------------------------------------------------------#
#Version 1.1
#Update document
#1.Because we found that there are only some parameters need to be declared, thus, input of some parameters is canceled.
#2.
#Now, except REANALYSIS (global, 1948-present), GDAS 0.5 Degree (global, 2007-2019) and NAM 12km (since 05/2007)(only for U.S.) are supported.
#Please input the parameter : metdata = 'reanalsis' ; metdata = 'GDAS0P5' ; metdata = 'NAM12' to get different data you want.
#3.
#Now, you can input the frequece in the date generation. For example, input the parameter: freq = '1M' ; freq = '5M' ; freq = '5D' ; freq = '2Y' -
#-will generate the date with intervals : 1month, 5months and 5days and 2years.
#4.We add some output text when the code is running to make it easy to see the status.
#-------------------------------------------------------------------------------------------------------------------------------------------------#