### Creating CSV for testing scenarios CATE opensearch query 

The below code automatically creates a csv file with testing scenarios. Based on the metatdata of each dataset collection a short temporal subset, a subset of up to 3 variables and a spatial subset of 1°lat x 1°lon is defined. This code should only be run when changes of the open search portal have occured, otherwise the testing scenario csv should be always reused in order to have consecutive testing results based on the same scenarios which then allow to see the changes between test runs. 

In [1]:
%matplotlib inline
import nest_asyncio
nest_asyncio.apply()

In [2]:
from cate.core.ds import DATA_STORE_REGISTRY
import cate.ops as ops
from cate.util.monitor import ConsoleMonitor
from cate.core import ds
import datetime
import random
import os
import csv
from csv import DictWriter

In [3]:
def append_dict_as_row(file_name, dict_of_elem, field_names):
    # Open file in append mode
    with open(file_name, 'a+', newline='') as write_obj:
        # Create a writer object from csv module
        dict_writer = DictWriter(write_obj, fieldnames=field_names, delimiter=';')
        # Add dictionary as wor in the csv
        dict_writer.writerow(dict_of_elem)


def update_csv(results_csv, header_row, results_for_dataset_collection):
    if not os.path.isfile(results_csv):
        with open(results_csv, 'w', newline='') as file:
            writer = csv.writer(file, delimiter=';')
            writer.writerow(header_row)
    append_dict_as_row(results_csv, results_for_dataset_collection, header_row)

In [4]:
data_store = DATA_STORE_REGISTRY.get_data_store('esa_cci_odp_os')

In [5]:
datasets = data_store.query()

Data source esacci.AEROSOL.satellite-orbit-frequency.L2P.AER_PRODUCTS.ATSR-2.ERS-2.SU.4-21.r1 already included. Will omit this one.


In [6]:
len(datasets)

234

In [23]:
# Name of the output file of the test scenarios csv
test_scenarios_csv = 'test_scenarios_after_release_1.csv'

In [24]:
test_scenarios_header_row = ['dataset_collection','temporal_subset','variables_subset','spatial_subset']

In [25]:
# for each dataset collection a row is created with the specification for each collumn
test_rows = {}

In [26]:
for dataset in datasets:
    dataset.update_file_list()
#    print(dataset.id)
    test_rows['dataset_collection'] = dataset.id
    try:
        time_range = tuple(t.strftime('%Y-%m-%d') for t in [dataset._file_list[0][1], dataset._file_list[1][2]])
    except:
        try:
            time_range = tuple(t.strftime('%Y-%m-%d') for t in [dataset._file_list[0][1], dataset._file_list[0][2]])
        except:
            print(f'{dataset.id} has no file list. dataset._file_list :')
            print(dataset._file_list)
            continue
            
    test_rows['temporal_subset'] = time_range
    var_list = []
    if len(dataset.meta_info['variables']) > 3:
        while len(var_list) < 1:
            for var in random.choices(dataset.meta_info['variables'], k=3):
                if not ('longitude' in var['name'] or 'latitude' in var['name'] or 'bounds' in var['name'] or 'bnds' in var['name']) and var['name'] not in var_list:
                    var_list.append(var['name'])
    else:
        for var in dataset.meta_info['variables']:
            if not ('longitude' in var['name'] or 'latitude' in var['name'] or 'bounds' in var['name'] or 'bnds' in var['name']) and var['name'] not in var_list:
                var_list.append(var['name'])
    test_rows['variables_subset'] = var_list
#    print(var_list)
    indx = random.uniform(float(dataset.meta_info['bbox_minx']), float(dataset.meta_info['bbox_maxx']))
    indy = random.uniform(float(dataset.meta_info['bbox_miny']), float(dataset.meta_info['bbox_maxy']))
    if indx == float(dataset.meta_info['bbox_maxx']):
        indx = indx-1
    if indy == float(dataset.meta_info['bbox_maxy']):
        indy = indy-1
    if indx > 0 and indy > 0:  
        test_rows['spatial_subset'] = f'{indx}, {indy}, {(indx+1)}, {(indy+1)}'
    elif indx < 0 and indy < 0:
        test_rows['spatial_subset'] = f'{(indx-1)}, {(indy-1)}, {indx}, {indy}'
    elif indx < 0:
        test_rows['spatial_subset'] = f'{(indx-1)}, {indy},{indx}, {(indy+1)}'
    elif indy < 0:
        test_rows['spatial_subset'] = f'{indx}, {(indy-1)}, {(indx+1)}, {indy}'

#    print(test_rows['spatial_subset'])
    update_csv(test_scenarios_csv, test_scenarios_header_row, test_rows)

esacci.ICESHEETS.satellite-orbit-frequency.L4.GLL.multi-sensor.multi-platform.VARIOUS.v1-3.r1 has no file list. dataset._file_list :
[]
esacci.FIRE.mon.L3S.BA.MODIS.Terra.MODIS_TERRA.v5-1.pixel has no file list. dataset._file_list :
[]
esacci.ICESHEETS.mon.IND.GMB.GRACE-instrument.GRACE.VARIOUS.1-3.time_series has no file list. dataset._file_list :
[]
esacci.FIRE.mon.L3S.BA.MSI-(Sentinel-2).Sentinel-2A.MSI.v1-1.pixel has no file list. dataset._file_list :
[]
esacci.ICESHEETS.yr.L4.CFL.multi-sensor.multi-platform.VARIOUS.v3-0.r1 has no file list. dataset._file_list :
[]
