# Pulls data from the Copernicus Data Provider (European Commision) available at cds.climate.copernicus.eu
The output is a csv file containing the requested time series.

As currently downloading the complete (daily) data set is not supported due to size limitations - only sampled data for every 10th day is available

To use the data set legally it is required to create an account here: https://cds.climate.copernicus.eu/
Please then obtain your API key from your user provide and provide it as a parameter to this component

WARNING: This component currently only supports local execution (not Kubeflow/Airflow)

Future work  
[ ] Download complete data set by creating multile requests and then merge the results

In [24]:
!pip3 install xarray==0.17.0 netcdf4==1.5.6 cdsapi==0.5.1 wget==3.2

Collecting xarray==0.17.0
  Downloading xarray-0.17.0-py3-none-any.whl (759 kB)
[K     |████████████████████████████████| 759 kB 8.8 MB/s eta 0:00:01
[?25hCollecting netcdf4==1.5.6
  Downloading netCDF4-1.5.6-cp38-cp38-manylinux2014_x86_64.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 44.3 MB/s eta 0:00:01
[?25hCollecting cdsapi==0.5.1
  Downloading cdsapi-0.5.1.tar.gz (12 kB)
Collecting wget==3.2
  Downloading wget-3.2.zip (10 kB)
Collecting cftime
  Downloading cftime-1.4.1-cp38-cp38-manylinux2014_x86_64.whl (322 kB)
[K     |████████████████████████████████| 322 kB 49.8 MB/s eta 0:00:01
Building wheels for collected packages: cdsapi, wget
  Building wheel for cdsapi (setup.py) ... [?25ldone
[?25h  Created wheel for cdsapi: filename=cdsapi-0.5.1-py2.py3-none-any.whl size=11688 sha256=5ec336f25378ae2fda86edb96b8ee17c3b8fb8d31496291a555afb329b307c74
  Stored in directory: /home/jovyan/.cache/pip/wheels/5b/a2/af/7a52b8437e534f0b5f1b71232dd9534920579a142d9a191a63
  

In [2]:
# @param api key in form UID:APIKey obtained from
# https://cds.climate.copernicus.eu/
# @param data_dir temporal data storage for local execution
# @param file_name csv file name
# @param start_year of data
# @param end_year of data

In [3]:
import wget
wget.download(
    'https://raw.githubusercontent.com/elyra-ai/' +
    'component-library/master/claimed_utils.py'
)

'claimed_utils (8).py'

In [41]:
import cdsapi
import xarray as xr
from claimed_utils import unzip
import pandas as pd
import os
import glob

In [42]:
apikey = os.environ.get('api key')
file_name = os.environ.get('file_name', 'data.csv')
data_dir = os.environ.get('data_dir', '../../data/')
start_year = os.environ.get('file_name', '2017') #up to 1978
end_year = os.environ.get('file_name', '2019')

In [43]:
apikey = '80380:4b060692-2f02-446c-8c48-486987c39885'

In [44]:
skip = False

if os.path.exists(data_dir+file_name):
    skip = True

In [45]:
skip = False

In [46]:
if not skip:
    with open(os.path.expanduser('~/.cdsapirc'), "w") as myfile:
        myfile.write("url: https://cds.climate.copernicus.eu/api/v2\n")
        myfile.write("key: "+apikey+"\n")
        myfile.write("verify: 0")

In [47]:
year_range = []
for i in range(int(start_year),int(end_year)+1):
    year_range.append(str(i))


In [48]:
query = {
            'variable': 'volumetric_surface_soil_moisture',
            'type_of_sensor': 'passive',
            'time_aggregation': 'month_average',
            'year': [
                '2017', '2018', '2019'
            ],
            'month': [
                '01', '02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',
            ],
            'day': '01',
            'type_of_record': 'cdr',
            'version': 'v201912.0.0',
            'format': 'zip',
        }
query['year'] = year_range

In [49]:
if not skip:
    
    c = cdsapi.Client()

    c.retrieve(
        'satellite-soil-moisture',
        query,
        data_dir+'download.zip')

2021-04-08 05:45:34,309 INFO Welcome to the CDS
2021-04-08 05:45:34,311 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/satellite-soil-moisture
2021-04-08 05:45:34,409 INFO Request is completed
2021-04-08 05:45:34,411 INFO Downloading https://download-0014.copernicus-climate.eu/cache-compute-0014/cache/data4/dataset-satellite-soil-moisture-adb430f3-7933-4cd8-9bad-e2b71242ae38.zip to ../../data/download.zip (25.9M)
2021-04-08 05:45:35,341 INFO Download rate 27.9M/s  


In [9]:
if not skip:
    for f in glob.glob(data_dir+'*.nc'):
        os.remove(f)

In [10]:
if not skip:
    unzip(data_dir, data_dir+'download.zip')

In [11]:
debug = False

if not skip:
    for filename in os.listdir(data_dir):
        if filename.endswith(".nc") :
            if debug:
                print('Starting to process {}...'.format(filename))
            else:
                print(".", end ="") 
            filename_csv = filename.split('.nc')[0]+'.csv'
            if not os.path.exists(data_dir+filename_csv):
                dset = xr.open_dataset(os.path.join(data_dir, filename))
                df = pd.DataFrame(dset['sm'].to_series())
                df.reset_index(inplace=True)
                df.to_csv(data_dir+filename_csv, index=False)
            else:
                if debug:
                    print('CSV file {} already present, skipping...'.format(filename_csv))
                else:
                    print("c", end ="") 
            os.remove(data_dir+filename)
        else:
            continue

In [12]:
!echo "time,lat,lon,sm" > ../../data/data.csv
!for file in `ls ../../data/C3S-SOILMOISTURE-*.csv`; do cat $file |tail -n +2 >> ../../data/data.csv; done