This code aims to download [historic data derived from in-situ and satellite observations from the Copernicus website](https://cds.climate.copernicus.eu/cdsapp#!/dataset/insitu-gridded-observations-global-and-regional?tab=form). The code works, but it does not format information, it is just downloading files.

In [None]:
## Import packages ..... 
import cdsapi
import os
import os.path
from netCDF4 import Dataset
#import shutil # to move folders


## ..... and function
from FunctionsAndClass import year_copernicus
from FunctionsAndClass import path_length

In [2]:
## list all element necessary for download
class obs_elements:
    versions = ('v1.0','v2.0','v2020.0','v2020.0-v6.0-fg','v4.0','v4.03','v6.0')
    origins = ('berkearth','chirps','cmorph','cpc','cpc-conus','cru','gistemp','gpcc','imerg')
    # test only with chirps and cru at the beginning
    horizontal_aggregations = ('0_25_x_0_25','0_2_x_0_2','0_5_x_0_5','1_x_1','2_5_x_2_5','horizontal_average')
    time_aggregations = ('daily','monthly')
    regions = ('africa','global','conus','quasi_global')
    variables = ('precipitation','temperature','temperature_anomaly')
    statistics = ('NaN','maximum','mean','minimum') # NaN for when statistic is not needed

In [10]:
# create a list of iterable for the later for loop which will be use to download elements
list_obs_elements = []
for statistic in obs_elements.statistics:
    for origin in obs_elements.origins:
        for horizontal_aggregation in obs_elements.horizontal_aggregations:
            for time_aggregation in obs_elements.time_aggregations:
                for region in obs_elements.regions:
                    for variable in obs_elements.variables:
                        for version in obs_elements.versions:
                            list_obs_elements.append((version,origin,horizontal_aggregation,time_aggregation,region,variable,statistic))

In [13]:
#def download_extract(path_for_file,source):
def download_extract(path_for_file):
    if not os.path.isdir(path_for_file): # path_for_file does not exists, need to ensure that is is created
        os.makedirs(path_for_file) # to ensure the creation of the path
    # unzip the downloaded file
    from zipfile import ZipFile
    zf = ZipFile('download.zip', 'r')
    zf.extractall(path_for_file) # if no precision of directory, extract in current directory
    zf.close()

    #if not os.path.isdir(file_download): # path_for_file does not exists, need to ensure that is is created
    #   os.makedirs(file_download) # to ensure the creation of the path
    # moving download to appropriate place
    #shutil.move('download.zip',file_download) # no need to delete 'download.zip' from inital place
    #shutil.move(source,path_for_file) # move extracted data to the path created for them
    #path_file=os.path.join(path_for_file,source)
    print('\n ------------------------------------- The downloaded file is extracted -------------------------------------')
    #os.remove('download.zip') # delete 'download.zip' from current directory
    return #path_file

In [14]:
# this function define the start_date, dependent on the origin
def f_start_date(origin):
    # start_date depend on the origin
    if origin == 'berkearth':
        start_date = 1950
    if origin == 'chirps':
        start_date = 1981
    if origin == 'cmorph':
        start_date = 1998
    if origin == 'cpc':
        start_date = 1979
    if origin == 'cpc-conus':
        start_date = 1950
    if origin == 'cru':
        start_date = 1950
    if origin == 'gistemp':
        start_date = 1950
    if origin == 'gpcc':
        start_date = 1950
    if origin == 'imerg':
        start_date = 2000
    return start_date

# this function defines the period of the set of data
def f_period_str(origin):
    # stop_date is the same for every origin
    stop_date = 2015
    start_date = f_start_date(origin) # start_date depend on the origin
    period = str(start_date) +'-'+str(stop_date)
    return period

## this function produce the year vector, which is dependent on the origin

def year_temp_pr_obs(origin):
    # stop_date is the same for every origin
    stop_date = 2015
    start_date = f_start_date(origin) # start_date depend on the origin
    # produce vector of years
    (year, year_str, index)= year_copernicus(start_date,stop_date)
    return year_str

In [15]:
## this function aim to download observation data of https://cds.climate.copernicus.eu/cdsapp#!/dataset/insitu-gridded-observations-global-and-regional?tab=form
# the function returns the path where the data has been downloaded
def try_download_copernicus_temp_pr_obs(version,origin,horizontal_aggregation,time_aggregation,region,variable,statistic,path_for_file):
    # produce the year vector, which is dependent on the origin
    year = year_temp_pr_obs(origin)
    
    # start the API request
    c = cdsapi.Client()# function to use the c.retrieve
    # basic needed dictionnary to give to the c.retrieve function the parameters asked by the user
    variables = {
                'format': 'zip', # this function is only designed to download and unzip zip files
                'version': version,
                'origin': origin,
                'year': year,
                'horizontal_aggregation': horizontal_aggregation,
                'time_aggregation': time_aggregation,
                'region': region,
                'variable': variable,
    }
    
    if statistic != 'NaN':
        variables['statistic'] = statistic
    
    try:
        c.retrieve(
            'insitu-gridded-observations-global-and-regional',
            variables,
            'download.zip') # the file in a zip format is registered in the current directory
    except:
        print('------------------------------- Some parameters are not matching -------------------------------')
        return # stop the function, because some data the user entered are not matching
    
    # function to extract the downloaded zip
    #path_file=download_extract(path_for_file,file_download,source)
    download_extract(path_for_file)
    os.remove('download.zip') # delete 'download.zip' from current directory
    return #path_file

In [None]:
# define path and source of data
out_path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\Observations'
#source = 'copernicus-data-in-situ-and-satellite'
# start testing if data exists and if yes, download them with function try_download_copernicus_temp_pr_obs
count = 0
for (version, origin, horizontal_aggregation,time_aggregation, region,variable,statistic) in list_obs_elements:
    count +=1
    print('\n')    
    print('version: ' +version)
    print('\n')    
    print('origin: ' +origin)
    print('\n')
    print('horizontal_aggregation: ' +horizontal_aggregation)
    print('\n')
    print('time_aggregation: ' +time_aggregation)
    print('\n')
    print('region: ' +region )
    print('\n')
    print('variable: '+ variable)
    print('\n')
    print('statistic: ' +statistic)
    print('\n')
    period = f_period_str(origin) # produce string of the considered period, to integrate period in the path
    path_for_file = os.path.join(out_path,variable, region,period, time_aggregation, horizontal_aggregation, origin, version) # create string of the path
    if statistic != 'NaN': # a statitics is requested
        path_for_file = os.path.join(path_for_file, statistic) # add statistic in the path
    if os.path.isdir(path_for_file):
        # the path for the file already exists, so the files too
        continue # continue to the next loop
    path_for_file=path_length(path_for_file)# test if path is too long
    # try to download the data with the parameter of the loop. If the parameter match, the file will be dowloaded 
    # and extract within the same function try_download_copernicus_temp_pr_obs
    try_download_copernicus_temp_pr_obs(version,origin,horizontal_aggregation,time_aggregation,region,variable,statistic,path_for_file)
    if 'download.zip' in os.listdir():
        # there is a file named 'download.zip' in the current directory
        os.remove('download.zip') # delete 'download.zip' from current directory

In [None]:
## test download of chirps

period = f_period_str('chirps') # produce string of the considered period, to integrate period in the path
horizontal_aggregation = '0_25_x_0_25'
time_aggregation = 'daily'
region = 'africa'
variable = 'precipitation'
version = 'v2.0'
origin = 'chirps'

out_path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\Observations'
path_for_file = os.path.join(out_path,variable, region,period, time_aggregation, horizontal_aggregation, origin, version) # create string of the path

c = cdsapi.Client()

c.retrieve(
    'insitu-gridded-observations-global-and-regional',
    {
        'format': 'zip',
        'version': 'v2.0',
        'origin': 'chirps',
        'year': [
            '1981', '1982', '1983',
            '1984', '1985', '1986',
            '1987', '1988', '1989',
            '1990', '1991', '1992',
            '1993', '1994', '1995',
            '1996', '1997', '1998',
            '1999', '2000', '2001',
            '2002', '2003', '2004',
            '2005', '2006', '2007',
            '2008', '2009', '2010',
            '2011', '2012', '2013',
            '2014', '2015', '2016',
            '2017', '2018', '2019',
            '2020', '2021',
        ],
        'horizontal_aggregation': '0_25_x_0_25',
        'time_aggregation': 'daily',
        'region': 'africa',
        'variable': 'precipitation',
    },
    'download.zip')

download_extract(path_for_file)


In [16]:
# download for cru, precipitation

period = f_period_str('cru') # produce string of the considered period, to integrate period in the path
horizontal_aggregation = '0_5_x_0_5'
time_aggregation = 'monthly'
region = 'global'
variable = 'precipitation'
version = 'v4.03'
origin = 'cru'

out_path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\Observations'
path_for_file = os.path.join(out_path,variable, region,period, time_aggregation, horizontal_aggregation, origin, version) # create string of the path


c = cdsapi.Client()

c.retrieve(
    'insitu-gridded-observations-global-and-regional',
    {
        'format': 'zip',
        'origin': 'cru',
        'region': 'global',
        'variable': 'precipitation',
        'time_aggregation': 'monthly',
        'horizontal_aggregation': '0_5_x_0_5',
        'year': [
            '1950', '1951',
            '1952', '1953', '1954',
            '1955', '1956', '1957',
            '1958', '1959', '1960',
            '1961', '1962', '1963',
            '1964', '1965', '1966',
            '1967', '1968', '1969',
            '1970', '1971', '1972',
            '1973', '1974', '1975',
            '1976', '1977', '1978',
            '1979', '1980', '1981',
            '1982', '1983', '1984',
            '1985', '1986', '1987',
            '1988', '1989', '1990',
            '1991', '1992', '1993',
            '1994', '1995', '1996',
            '1997', '1998', '1999',
            '2000', '2001', '2002',
            '2003', '2004', '2005',
            '2006', '2007', '2008',
            '2009', '2010', '2011',
            '2012', '2013', '2014',
            '2015', '2016', '2017',
            '2018', '2019',
        ],
        'version': 'v4.03',
    },
    'download.zip')
download_extract(path_for_file)
os.remove('download.zip') # delete 'download.zip' from current directory

2023-05-25 11:44:49,290 INFO Welcome to the CDS
2023-05-25 11:44:49,291 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/insitu-gridded-observations-global-and-regional
2023-05-25 11:44:49,383 INFO Request is queued
2023-05-25 11:44:50,436 INFO Request is running
2023-05-25 11:45:38,998 INFO Request is completed
2023-05-25 11:45:38,999 INFO Downloading https://download-0000-clone.copernicus-climate.eu/cache-compute-0000/cache/data5/dataset-insitu-gridded-observations-global-and-regional-1f23f03f-1fdb-42d6-808b-5c1986d445ee.zip to download.zip (109.4M)
2023-05-25 11:46:07,776 INFO Download rate 3.8M/s                                                                      



 ------------------------------------- The downloaded file is extracted -------------------------------------


In [17]:
# download for cru, temperature
period = f_period_str('cru') # produce string of the considered period, to integrate period in the path
horizontal_aggregation = '0_5_x_0_5'
time_aggregation = 'monthly'
region = 'global'
variable = 'temperature'
version = 'v4.03'
origin = 'cru'

out_path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\Observations'
path_for_file = os.path.join(out_path,variable, region,period, time_aggregation, horizontal_aggregation, origin, version) # create string of the path



c = cdsapi.Client()

c.retrieve(
    'insitu-gridded-observations-global-and-regional',
    {
        'format': 'zip',
        'origin': 'cru',
        'region': 'global',
        'variable': 'temperature',
        'time_aggregation': 'monthly',
        'horizontal_aggregation': '0_5_x_0_5',
        'year': [
            '1950', '1951',
            '1952', '1953', '1954',
            '1955', '1956', '1957',
            '1958', '1959', '1960',
            '1961', '1962', '1963',
            '1964', '1965', '1966',
            '1967', '1968', '1969',
            '1970', '1971', '1972',
            '1973', '1974', '1975',
            '1976', '1977', '1978',
            '1979', '1980', '1981',
            '1982', '1983', '1984',
            '1985', '1986', '1987',
            '1988', '1989', '1990',
            '1991', '1992', '1993',
            '1994', '1995', '1996',
            '1997', '1998', '1999',
            '2000', '2001', '2002',
            '2003', '2004', '2005',
            '2006', '2007', '2008',
            '2009', '2010', '2011',
            '2012', '2013', '2014',
            '2015', '2016', '2017',
            '2018', '2019',
        ],
        'version': 'v4.03',
        'statistic': [
            'maximum', 'mean', 'minimum',
        ],
    },
    'download.zip')

download_extract(path_for_file)
os.remove('download.zip') # delete 'download.zip' from current directory

2023-05-25 11:48:12,532 INFO Welcome to the CDS
2023-05-25 11:48:12,533 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/insitu-gridded-observations-global-and-regional
2023-05-25 11:48:12,584 INFO Request is queued
2023-05-25 11:48:13,622 INFO Request is running
2023-05-25 11:51:04,404 INFO Request is completed
2023-05-25 11:51:04,406 INFO Downloading https://download-0003-clone.copernicus-climate.eu/cache-compute-0003/cache/data6/dataset-insitu-gridded-observations-global-and-regional-8793e68f-31da-4cff-8305-cd9e509ddd38.zip to download.zip (246.5M)
2023-05-25 11:52:04,759 INFO Download rate 4.1M/s                                                                      



 ------------------------------------- The downloaded file is extracted -------------------------------------


In [23]:
path_for_file

'\\\\COWI.net\\projects\\A245000\\A248363\\CRVA\\Datasets\\Observations\\temperature\\global\\1950-2015\\monthly\\0_5_x_0_5\\cru\\v4.03'

In [19]:
name = 'CRU_total_precipitation_mon_0.5x0.5_global_1950_v4.03.nc'
path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\Observations\precipitation\global\1950-2019\monthly\0_5_x_0_5\cru\v4.03'
path1 = os.path.join(path,name)
df = Dataset(path1)

In [20]:
df.variables

{'time': <class 'netCDF4._netCDF4.Variable'>
 float32 time(time)
     standard_name: time
     long_name: time
     units: days since 1900-1-1
     calendar: gregorian
     axis: T
 unlimited dimensions: time
 current shape = (12,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'lon': <class 'netCDF4._netCDF4.Variable'>
 float32 lon(lon)
     standard_name: longitude
     long_name: longitude
     units: degrees_east
     axis: X
 unlimited dimensions: 
 current shape = (720,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'lat': <class 'netCDF4._netCDF4.Variable'>
 float32 lat(lat)
     standard_name: latitude
     long_name: latitude
     units: degrees_north
     axis: Y
 unlimited dimensions: 
 current shape = (360,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'pr': <class 'netCDF4._netCDF4.Variable'>
 float32 pr(time, lat, lon)
     standard_name: lwe_precipitation_rate
     long_name: total monthly precipitation
     units: mm

In [21]:
name1 = 'CRU_maximum_temperature_mon_0.5x0.5_global_1951_v4.03.nc'
path1 = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\Observations\temperature\global\1950-2019\monthly\0_5_x_0_5\cru\v4.03'
path2 = os.path.join(path1,name1)
df1 = Dataset(path2)

In [22]:
df1.variables

{'time': <class 'netCDF4._netCDF4.Variable'>
 float32 time(time)
     standard_name: time
     long_name: time
     units: days since 1900-1-1
     calendar: gregorian
     axis: T
 unlimited dimensions: time
 current shape = (12,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'lon': <class 'netCDF4._netCDF4.Variable'>
 float32 lon(lon)
     standard_name: longitude
     long_name: longitude
     units: degrees_east
     axis: X
 unlimited dimensions: 
 current shape = (720,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'lat': <class 'netCDF4._netCDF4.Variable'>
 float32 lat(lat)
     standard_name: latitude
     long_name: latitude
     units: degrees_north
     axis: Y
 unlimited dimensions: 
 current shape = (360,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'tasmax': <class 'netCDF4._netCDF4.Variable'>
 float32 tasmax(time, lat, lon)
     standard_name: air_temperature
     long_name: maximum monthly near-surface temperature
