<h1>MANDATORY PACKAGES</h1>

In [None]:
import ftputil #pip install ftputil
import os
import datetime
import numpy as np
from shapely.geometry import box #conda install Shapely
from collections import namedtuple

<H1>AUXILIARY FUNCTIONS </H1>

In [None]:
def bbox_check(netCDF, targeted_bounding_box):
    geospatial_lat_min = float(netCDF['geospatial_lat_min'])
    geospatial_lat_max = float(netCDF['geospatial_lat_max'])
    geospatial_lon_min = float(netCDF['geospatial_lon_min'])
    geospatial_lon_max = float(netCDF['geospatial_lon_max'])
    bounding_box = box(geospatial_lon_min, geospatial_lat_min, geospatial_lon_max, geospatial_lat_max)        
    if targeted_bounding_box.contains(bounding_box): 
        return True
    else:
        return False

In [None]:
def timerange_check(netCDF, targeted_ini, targted_end):
    time_start = datetime.datetime.strptime(netCDF['time_coverage_start'].decode('utf-8'), date_format)
    time_end = datetime.datetime.strptime(netCDF['time_coverage_end'].decode('utf-8'), date_format)
    
    Range = namedtuple('Range', ['start', 'end'])
    r1 = Range(start=targeted_ini, end=targted_end)
    r2 = Range(start=time_start, end=time_end)
    
    latest_start = max(r1.start, r2.start)
    earliest_end = min(r1.end, r2.end)
    delta = (earliest_end - latest_start).days + 1
    overlap = max(0, delta)
    if overlap != 0:
        return True
    else:
        return False

In [None]:
def parameter_check(netCDF, targeted_parameters):
    params = netCDF['parameters'].decode('utf-8').split(' ')
    result = False
    for param in params:
        if param in targeted_parameters:
            result = True
    return result

In [None]:
def platformtype_check(netCDF, targeted_platform_types):
    ftplink = netCDF['file_name'].decode('utf-8')
    result = False
    for platform_type in targeted_platform_types:
        if platform_type == 'TS':
            platform_type = 'TS_TS'
        if '_'+platform_type+'_' in ftplink:
            result = True
    return result

<h1>CUSTOM SETTINGS</h1>

In [None]:
output_directory = os.getcwd() #default to current working directory

In [None]:
user = '' #type CMEMS user name
password = '' #type CMEMS password

In [None]:
product_name = 'INSITU_MED_NRT_OBSERVATIONS_013_035' #type aimed In Situ product 
host = 'nrt.cmems-du.eu' #type aimed host (nrt.cmems-du.eu or my.cmems-du)
index_file = 'index_history.txt' #type aimed index file

<H1> SELECTION CRITERIA</H1>

In [None]:
#selection criteria: spatial coverage
targeted_geospatial_lat_min = 42.9146   # enter min latitude of your bounding box
targeted_geospatial_lat_max =  44.8395   # enter max latitude of your bounding box
targeted_geospatial_lon_min = 7.3546  # enter min longitude of your bounding box
targeted_geospatial_lon_max =  12.3997  # enter max longitude of your bounding box
targeted_bounding_box = box(targeted_geospatial_lon_min, targeted_geospatial_lat_min, targeted_geospatial_lon_max, targeted_geospatial_lat_max)

In [None]:
#selection criteria: timerange
date_format = "%Y-%m-%dT%H:%M:%SZ" 
ini = datetime.datetime.strptime('2018-10-01T00:00:00Z', date_format)
end = datetime.datetime.strptime('2018-11-30T23:59:59Z', date_format)

In [None]:
#selection criteria: parameters
targeted_parameters = ['TEMP', 'PSAL']

In [None]:
#selection criteria: platform type
targeted_platform_types = ['MO', 'RF']

<H1>FILES MATCHING</H1>

Check the above index file and product looking for file sthat matches all the selection criteria specified above and return the number of files matching:

In [None]:
matches = []
with ftputil.FTPHost(host, user, password) as ftp_host: 
    #open the index file to read
    with ftp_host.open("Core"+'/'+product_name+'/'+index_file, "r") as indexfile:
        #read the index file as a comma-separate-value file
        index = np.genfromtxt(indexfile, skip_header=6, unpack=False, delimiter=',', dtype=None, names=['catalog_id', 'file_name','geospatial_lat_min', 'geospatial_lat_max', 'geospatial_lon_min','geospatial_lon_max','time_coverage_start', 'time_coverage_end', 'provider', 'date_update', 'data_mode', 'parameters'])
        #loop over the lines/netCDFs and download the most sutable ones for you
        for netCDF in index:
            #getting ftplink, filepath and filename
            ftplink = netCDF['file_name'].decode('utf-8')
            filepath = '/'.join(ftplink.split('/')[3:len(ftplink.split('/'))])
            ncdf_file_name = ftplink[ftplink.rfind('/')+1:]
            #download netCDF if meeting all selection criteria
            if bbox_check(netCDF, targeted_bounding_box) and timerange_check(netCDF, ini, end) and platformtype_check(netCDF, targeted_platform_types) and parameter_check(netCDF, targeted_parameters): 
                matches.append(netCDF)
print('Number of matches : '+str(len(matches)))

<H1>DOWNLOADING</H1>

In [None]:
os.chdir(output_directory) #change directory to the one designated for downloading
with ftputil.FTPHost(host, user, password) as ftp_host: 
    #open the index file to read
    with ftp_host.open("Core"+'/'+product_name+'/'+index_file, "r") as indexfile:
        for netCDF in matches:
            #getting ftplink, filepath and filename
            ftplink = netCDF['file_name'].decode('utf-8')
            filepath = '/'.join(ftplink.split('/')[3:len(ftplink.split('/'))])
            ncdf_file_name = ftplink[ftplink.rfind('/')+1:]
            #download netCDF if meeting all selection criteria
            if ftp_host.path.isfile(filepath):
                ftp_host.download(filepath, ncdf_file_name)  # remote, local
                print(ncdf_file_name)