In [8]:
from pyesgf.search import SearchConnection
import os
import pandas as pd
import requests
from tqdm import tqdm
import xarray as xr
import json
import requests
from urllib3.exceptions import InsecureRequestWarning
from urllib3 import disable_warnings
disable_warnings(InsecureRequestWarning)
os.environ["ESGF_PYCLIENT_NO_FACETS_STAR_WARNING"] = "on"

# Download CMIP5 Models

In [9]:
def download(url, path, filename):
    total_filepath = os.path.join(path, filename)
    
    # Proceed if file doesn't exist yet
    if os.path.exists(total_filepath):
        return 
    
    print(f"Downloading {filename}")
    r = requests.get(url, stream=True, timeout=10, verify=False)
    total_size, block_size = int(r.headers.get('content-length', 0)), 1024
    with open(total_filepath, 'wb') as f:
        for data in tqdm(r.iter_content(block_size),
                         total=total_size//block_size,
                         unit='KiB', unit_scale=True):
            f.write(data)

    if total_size != 0 and os.path.getsize(total_filepath) != total_size:
        print("Downloaded size does not match expected size!\n",
              "FYI, the status code was ", r.status_code)

In [10]:
conn = SearchConnection('https://esgf-node.llnl.gov/esg-search', distrib=True)

In [19]:
# The file 'DATA_SUMMARY.json' is a user-created guide for which models/experiments to look at
with open("../data/DATA_SUMMARY.json", "r") as f:
    DATA_TAB = json.load(f)

variables = ["tas", "psl"]
print(DATA_TAB)

{'CanCM4': {'project': 'CMIP5', 'model': 'CanCM4', 'experiment': 'decadal1961', 'time_frequency': 'mon', 'realm': 'atmos', 'latest': True, 'data_node': 'crd-esgf-drc.ec.gc.ca'}, 'MIROC5': {'project': 'CMIP5', 'model': 'MIROC5', 'experiment': 'decadal1961', 'time_frequency': 'mon', 'realm': 'atmos', 'latest': True, 'data_node': 'aims3.llnl.gov'}, 'HadCM3': {'project': 'CMIP5', 'model': 'HadCM3', 'experiment': 'decadal1961', 'time_frequency': 'mon', 'realm': 'atmos', 'latest': True, 'data_node': 'esgf.ceda.ac.uk'}, 'MPI-ESM-LR': {'project': 'CMIP5', 'model': 'MPI-ESM-LR', 'experiment': 'decadal1961', 'time_frequency': 'mon', 'realm': 'atmos', 'latest': True, 'data_node': 'aims3.llnl.gov'}, 'GFDL-CM2.1': {'project': 'CMIP5', 'model': 'GFDL-CM2.1', 'experiment': 'decadal1961', 'time_frequency': 'mon', 'realm': 'atmos', 'latest': True, 'data_node': 'esgf.ceda.ac.uk'}}


In [20]:
# Get the list of files to download
files_to_download = []
for model in DATA_TAB.keys():
    print(DATA_TAB[model]["data_node"])
    # Query ESGF for all files matching the json object
    query = conn.new_context(**DATA_TAB[model], variable=",".join(variables))
    print(f"{model}: {query.hit_count} hits found.")
    results = query.search()
    
    # Download all of them 
    for i in tqdm(range(len(results)), desc=f"Getting files from {model}."):
        files = results[i].file_context().search()
        for file in files:
            for var in variables:
                if var in file.json["variable"]:
                    files_to_download.append({"var":var, 
                                         "model":model, 
                                         'filename': file.filename,
                                         'url':file.download_url})

esgf.ceda.ac.uk
GFDL-CM2.1: 10 hits found.


Getting files from GFDL-CM2.1.: 100%|██████████| 10/10 [01:42<00:00, 10.28s/it]


In [42]:
with open('../data/FILES_TO_DOWNLOAD.json', 'w') as fout:
    json.dump(files_to_download, fout)

In [3]:
with open('../data/FILES_TO_DOWNLOAD.json', 'r') as f:
    files_to_download = json.load(f)

In [21]:
for i in range(len(files_to_download)):
    target_f = files_to_download[i]
    data_folder_path = f"../data/models/{target_f['model'].replace('.', 'p')}/{target_f['var']}"
    
    if not os.path.isdir(data_folder_path):
        print(data_folder_path)
        os.makedirs(data_folder_path)
        
    download(target_f["url"],
            data_folder_path,
            target_f["filename"])

../data/models/GFDL-CM2p1/psl
Downloading psl_Amon_GFDL-CM2p1_decadal1961_r10i1p1_196101-197012.nc


6.09kKiB [00:02, 2.19kKiB/s]                          


../data/models/GFDL-CM2p1/tas
Downloading tas_Amon_GFDL-CM2p1_decadal1961_r10i1p1_196101-197012.nc


6.09kKiB [00:02, 2.34kKiB/s]                          


Downloading psl_Amon_GFDL-CM2p1_decadal1961_r1i1p1_196101-197012.nc


6.09kKiB [00:02, 2.47kKiB/s]                          


Downloading tas_Amon_GFDL-CM2p1_decadal1961_r1i1p1_196101-197012.nc


6.09kKiB [00:09, 636KiB/s]                            


Downloading psl_Amon_GFDL-CM2p1_decadal1961_r2i1p1_196101-197012.nc


6.09kKiB [00:03, 1.66kKiB/s]                          


Downloading tas_Amon_GFDL-CM2p1_decadal1961_r2i1p1_196101-197012.nc


6.09kKiB [00:02, 2.14kKiB/s]                          


Downloading psl_Amon_GFDL-CM2p1_decadal1961_r3i1p1_196101-197012.nc


6.09kKiB [00:02, 2.40kKiB/s]                          


Downloading tas_Amon_GFDL-CM2p1_decadal1961_r3i1p1_196101-197012.nc


6.09kKiB [00:02, 2.49kKiB/s]                          


Downloading psl_Amon_GFDL-CM2p1_decadal1961_r4i1p1_196101-197012.nc


6.09kKiB [00:02, 2.50kKiB/s]                          


Downloading tas_Amon_GFDL-CM2p1_decadal1961_r4i1p1_196101-197012.nc


6.09kKiB [00:02, 2.49kKiB/s]                          


Downloading psl_Amon_GFDL-CM2p1_decadal1961_r5i1p1_196101-197012.nc


6.09kKiB [00:02, 2.46kKiB/s]                          


Downloading tas_Amon_GFDL-CM2p1_decadal1961_r5i1p1_196101-197012.nc


6.09kKiB [00:02, 2.58kKiB/s]                          


Downloading psl_Amon_GFDL-CM2p1_decadal1961_r6i1p1_196101-197012.nc


6.09kKiB [00:02, 2.50kKiB/s]                          


Downloading tas_Amon_GFDL-CM2p1_decadal1961_r6i1p1_196101-197012.nc


6.09kKiB [00:02, 2.52kKiB/s]                          


Downloading psl_Amon_GFDL-CM2p1_decadal1961_r7i1p1_196101-197012.nc


6.09kKiB [00:02, 2.56kKiB/s]                          


Downloading tas_Amon_GFDL-CM2p1_decadal1961_r7i1p1_196101-197012.nc


6.09kKiB [00:09, 640KiB/s]                            


Downloading psl_Amon_GFDL-CM2p1_decadal1961_r8i1p1_196101-197012.nc


6.09kKiB [00:03, 1.72kKiB/s]                          


Downloading tas_Amon_GFDL-CM2p1_decadal1961_r8i1p1_196101-197012.nc


6.09kKiB [00:02, 2.16kKiB/s]                          


Downloading psl_Amon_GFDL-CM2p1_decadal1961_r9i1p1_196101-197012.nc


6.09kKiB [00:02, 2.39kKiB/s]                          


Downloading tas_Amon_GFDL-CM2p1_decadal1961_r9i1p1_196101-197012.nc


6.09kKiB [00:02, 2.47kKiB/s]                          


In [27]:
download(files_to_download[1]["url"], None, files_to_download[1]["filename"])

Downloading tas_Amon_HadCM3_decadal1961_r10i2p1_196111-197112.nc


Request processed...


3.36kKiB [00:00, 4.15kKiB/s]                          
