# Pulls data from the Copernicus Data Provider (European Commision) available at cds.climate.copernicus.eu
The output is a csv file containing the requested time series.

As currently downloading the complete (daily) data set is not supported due to size limitations - only sampled data for every 10th day is available

To use the data set legally it is required to create an account here: https://cds.climate.copernicus.eu/
Please then obtain your API key from your user provide and provide it as a parameter to this component

WARNING: This component currently only supports local execution (not Kubeflow/Airflow)

Future work  
[ ] Download complete data set by creating multile requests and then merge the results

In [1]:
!pip3 install xarray==0.17.0 netcdf4==1.5.6 cdsapi==0.5.1 wget==3.2

Collecting xarray==0.17.0
  Downloading xarray-0.17.0-py3-none-any.whl (759 kB)
[K     |████████████████████████████████| 759 kB 1.1 MB/s eta 0:00:01
[?25hCollecting netcdf4==1.5.6
  Downloading netCDF4-1.5.6-cp38-cp38-manylinux2014_x86_64.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 3.5 MB/s eta 0:00:01
[?25hCollecting cdsapi==0.5.1
  Downloading cdsapi-0.5.1.tar.gz (12 kB)
Collecting wget==3.2
  Downloading wget-3.2.zip (10 kB)
Collecting cftime
  Downloading cftime-1.4.1-cp38-cp38-manylinux2014_x86_64.whl (322 kB)
[K     |████████████████████████████████| 322 kB 3.2 MB/s eta 0:00:01
Building wheels for collected packages: cdsapi, wget
  Building wheel for cdsapi (setup.py) ... [?25ldone
[?25h  Created wheel for cdsapi: filename=cdsapi-0.5.1-py2.py3-none-any.whl size=11688 sha256=57e25a5a5291967839b2688a26df4b05e0ab60d014e799b286a1bea2277524fe
  Stored in directory: /home/jovyan/.cache/pip/wheels/5b/a2/af/7a52b8437e534f0b5f1b71232dd9534920579a142d9a191a63
  Bu

In [None]:
# @param api key in form UID:APIKey obtained from
# https://cds.climate.copernicus.eu/
# @param data_dir temporal data storage for local execution
# @param file_name csv file name

In [None]:
import wget
wget.download(
    'https://raw.githubusercontent.com/elyra-ai/' +
    'component-library/master/claimed_utils.py'
)

In [None]:
import cdsapi
import xarray as xr
from claimed_utils import unzip
import pandas as pd
import os
import glob

In [None]:
apikey = os.environ.get('api key')
file_name = os.environ.get('file_name', 'data.csv')
data_dir = os.environ.get('data_dir', '../../data/')

In [None]:
skip = False

if os.path.exists(data_dir+file_name):
    skip = True

In [None]:
if not skip:
    with open(os.path.expanduser('~/.cdsapirc'), "w") as myfile:
        myfile.write("url: https://cds.climate.copernicus.eu/api/v2\n")
        myfile.write("key: "+apikey+"\n")
        myfile.write("verify: 0")

In [None]:
if not skip:
    
    c = cdsapi.Client()

    c.retrieve(
        'satellite-soil-moisture',
        {
            'variable': 'volumetric_surface_soil_moisture',
            'type_of_sensor': 'passive',
            'time_aggregation': 'month_average',
            'year': [
                '1978', '1979', '1980',
                '1981', '1982', '1983',
                '1984', '1985', '1986',
                '1987', '1988', '1989',
                '1990', '1991', '1992',
                '1993', '1994', '1995',
                '1996', '1997', '1998',
                '1999', '2000', '2001',
                '2002', '2003', '2004',
                '2005', '2006', '2007',
                '2008', '2009', '2010',
                '2011', '2012', '2013',
                '2014', '2015', '2016',
                '2017', '2018', '2019',
            ],
            'month': [
                '01', '02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',
            ],
            'day': '01',
            'type_of_record': 'cdr',
            'version': 'v201912.0.0',
            'format': 'zip',
        },
        data_dir+'download.zip')

In [None]:
if not skip:
    for f in glob.glob(data_dir+'*.nc'):
        os.remove(f)

In [None]:
if not skip:
    unzip(data_dir, data_dir+'download.zip')

In [None]:
if not skip:
    df = None

    for filename in os.listdir(data_dir):
        if filename.endswith(".nc") :
            dset = xr.open_dataset(os.path.join(data_dir, filename))
            df1 = pd.DataFrame(dset['sm'].to_series())
            df1.reset_index(inplace=True)
            if df is None:
                df = df1
            else:
                df = df.append(df1, ignore_index=True)
        else:
            continue


    df.to_csv(data_dir+file_name, index=False)