This notebook will downlaod ANU raster climate data from the NCI

In [14]:
import urllib.request
import os
from tqdm.auto import tqdm
from multiprocess.pool import ThreadPool
from pathlib import Path

In [16]:
# define years your are interested in
# min_year = 1980
# max_year = 2021
%store -r min_year max_year
# dir to store raster data
# dam_forcast_working_dir = '/mnt/2TB Working/Projects/Dam forecast/v5'
dam_forcast_working_dir = os.path.join(os.getcwd(),'working')

In [24]:
# create downloads folder
ANU_cliamte_downlaod_dir = os.path.join(dam_forcast_working_dir,'ANU climate download')
Path(ANU_cliamte_downlaod_dir).mkdir(exist_ok=True,parents=True)
climate_types = ['rain','tavg']

In [25]:
# store vars
%store dam_forcast_working_dir
%store ANU_cliamte_downlaod_dir
%store min_year
%store max_year
%store climate_types

Stored 'dam_forcast_working_dir' (str)
Stored 'ANU_cliamte_downlaod_dir' (str)
Stored 'min_year' (int)
Stored 'max_year' (int)
Stored 'climate_types' (list)


In [18]:
base_url = 'https://dapds00.nci.org.au/thredds/fileServer/gh70/ANUClimate/v2-0/stable/month'
check_avaliability_url = 'https://dapds00.nci.org.au/thredds/catalog/gh70/ANUClimate/v2-0/stable/month/catalog.html'

In [19]:
# make sure the NCI thredds server is up
try:
    responce_code = urllib.request.urlopen(check_avaliability_url).getcode()
    if responce_code == 200:
        print('Server is up')
    else:
        print(f'The server is down, responce code {responce_code}')
except:
    print('No internet connection!')

Server is up


In [20]:
# make a list of urls to download
download_urls = []
# loop over each year
for year in range(min_year-1,max_year+1):
#     loop over each month
    for month in range(1,13):
#         add leading 0 to months
        month = str(month).zfill(2)
#         loop over climate types
        for climate_type in climate_types:
#             build file names and url
            file_name = f'ANUClimate_v2-0_{climate_type}_monthly_{year}{month}.nc'
            params = [base_url,climate_type,str(year),file_name]
            dl_url = f'{base_url}/{climate_type}/{year}/{file_name}'
            download_urls.append(dl_url)
            
print(f'Sample url \n{download_urls[1]}\n')
print(f'Total url count = {len(download_urls)}')

Sample url 
https://dapds00.nci.org.au/thredds/fileServer/gh70/ANUClimate/v2-0/stable/month/tavg/1979/ANUClimate_v2-0_tavg_monthly_197901.nc

Total url count = 1032


In [21]:
# func to downlaod url to specified local dir
def downlaod(url):
    file_name = os.path.basename(url)
    local_path = os.path.join(ANU_cliamte_downlaod_dir,file_name)
    urllib.request.urlretrieve(url, local_path)

In [22]:
# downlaod climate data multible at a time
with ThreadPool(40) as p:
    list(tqdm(p.map(downlaod,download_urls),total = len(download_urls)))

  0%|          | 0/1032 [00:00<?, ?it/s]