# Pulls data from the Copernicus Data Provider (European Commision) available at cds.climate.copernicus.eu
The output is a csv file containing the requested time series.

As currently downloading the complete (daily) data set is not supported due to size limitations - only sampled data for every 10th day is available

To use the data set legally it is required to create an account here: https://cds.climate.copernicus.eu/
Please then obtain your API key from your user provide and provide it as a parameter to this component

WARNING: This component currently only supports local execution (not Kubeflow/Airflow)

Future work  
[ ] Download complete data set by creating multile requests and then merge the results

In [None]:
!pip3 install xarray==0.17.0 netcdf4==1.5.6 cdsapi==0.5.1 wget==3.2

In [None]:
# @param api key in form UID:APIKey obtained from  https://cds.climate.copernicus.eu/
# @param data_dir temporal data storage for local execution
# @param file_name csv file name

In [1]:
import cdsapi
import xarray as xr
from claimed_utils import unzip
import pandas as pd
import os 

In [2]:
apikey = os.environ.get('api key')
file_name = os.environ.get('file_name', 'data.csv')
data_dir= os.environ.get('data_dir', '../../data/')

In [None]:
import os 
with open(os.path.expanduser('~/.cdsapirc'), "w") as myfile:
    myfile.write("url: https://cds.climate.copernicus.eu/api/v2\n")
    myfile.write("key: "+apikey+"\n")
    myfile.write("verify: 0")

In [None]:
import wget
wget.download('https://raw.githubusercontent.com/elyra-ai/component-library/master/claimed_utils.py')

In [3]:


c = cdsapi.Client()

c.retrieve(
    'satellite-soil-moisture',
    {
        'variable': 'volumetric_surface_soil_moisture',
        'type_of_sensor': 'passive',
        'time_aggregation': 'month_average',
        'year': [
            '1978', '1979', '1980',
            '1981', '1982', '1983',
            '1984', '1985', '1986',
            '1987', '1988', '1989',
            '1990', '1991', '1992',
            '1993', '1994', '1995',
            '1996', '1997', '1998',
            '1999', '2000', '2001',
            '2002', '2003', '2004',
            '2005', '2006', '2007',
            '2008', '2009', '2010',
            '2011', '2012', '2013',
            '2014', '2015', '2016',
            '2017', '2018', '2019',
        ],
        'month': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ],
        'day': '01',
        'type_of_record': 'cdr',
        'version': 'v201912.0.0',
        'format': 'zip',
    },
    '../../data/download.zip')

2021-03-16 17:32:59,965 INFO Welcome to the CDS
2021-03-16 17:32:59,966 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/satellite-soil-moisture
2021-03-16 17:33:00,125 INFO Request is queued
2021-03-16 17:34:54,545 INFO Request is completed
2021-03-16 17:34:54,546 INFO Downloading https://download-0009.copernicus-climate.eu/cache-compute-0009/cache/data1/dataset-satellite-soil-moisture-c3ab14b4-4aaf-439c-8772-bada58da20b9.zip to ../../data/download.zip (296.3M)
2021-03-16 17:36:23,269 INFO Download rate 3.3M/s    


Result(content_length=310652299,content_type=application/zip,location=https://download-0009.copernicus-climate.eu/cache-compute-0009/cache/data1/dataset-satellite-soil-moisture-c3ab14b4-4aaf-439c-8772-bada58da20b9.zip)

In [6]:

import glob, os
for f in glob.glob(data_dir+'*.nc'):
    os.remove(f)

In [7]:
unzip(data_dir,data_dir+'download.zip')

In [11]:
df = None


for filename in os.listdir(data_dir):
    if filename.endswith(".nc") :
        dset = xr.open_dataset(os.path.join(data_dir, filename))
        df1 = pd.DataFrame(dset['sm'].to_series())
        df1.reset_index(inplace=True)
        if df is None:
            df = df1
        else:
            df = df.append(df1, ignore_index=True)
    else:
        continue

In [12]:
df.to_csv(data_dir+file_name, index=False)