# PRE PROCESSING DATA PIPELINE

#### IMPORTS

In [1]:
import matplotlib.pyplot as plt
import xarray as xr

from reproduce.ose_data_pipeline import download_copernicus_data_for_sat, filt_daily_ssh_data, grid_input

  from .autonotebook import tqdm as notebook_tqdm


## REAL DATA TEST

### Pipeline must:

- #### **Download**, **concatenate** and **grid** the *input* nadirs data
- #### **Download** and **concatenate** the *reference* nadir data

### VALUES TO BE DEFINED:

#### 2023

In [None]:
# TIME VALUES
min_time = '2023-01-01'
max_time = '2023-12-31'

# LAT LON VALUES
min_lon = -66
max_lon = -54
min_lat = 32
max_lat = 44

# DATASET ID
copernicus_dataset_id = "cmems_obs-sl_glo_phy-ssh_nrt_{}-l3-duacs_PT1S"

# SATELLITES NAMES TO DOWNLOAD
satellites = ['al', 'c2n', 'h2b', 'j3n', 's3a', 's3b']
ref_satellites= ['s6a-hr', 'swon']

# DIR TO STORE DOWNLOADED DATA
download_sat_input_dir = '/DATASET/OCB_traces/dl/2023/input/{}'
download_sat_ref_dir = '/DATASET/OCB_traces/dl/2023/ref/{}'

# DIR TO STORE CONCATENATED DATA
concatenated_input_path = "/DATASET/OCB_traces/concat/concatenated_input_2023.nc"
concatenated_ref_path = "/DATASET/OCB_traces/concat/concatenated_ref_2023.nc"

# GRIDDED INPUT FILEPATH
gridded_input_path = "/DATASET/OCB_traces/input_test_6sat_2023.nc"

#### 2022

In [None]:
# TIME VALUES
min_time = '2022-01-01'
max_time = '2022-12-31'

# LAT LON VALUES
min_lon = -66
max_lon = -54
min_lat = 32
max_lat = 44

# DATASET ID
copernicus_dataset_id = "cmems_obs-sl_glo_phy-ssh_nrt_{}-l3-duacs_PT1S"

# SATELLITES NAMES TO DOWNLOAD
satellites = ['al', 'c2n', 'h2b', 'j3n', 's3a', 's3b']
ref_satellites= ['s6a-hr', 'swon']

# DIR TO STORE DOWNLOADED DATA
download_sat_input_dir = '/DATASET/OCB_traces/dl/2022/input/{}'
download_sat_ref_dir = '/DATASET/OCB_traces/dl/2022/ref/{}'

# DIR TO STORE CONCATENATED DATA
concatenated_input_path = "/DATASET/OCB_traces/concat/concatenated_input_2022.nc"
concatenated_ref_path = "/DATASET/OCB_traces/concat/concatenated_ref_2022.nc"

# GRIDDED INPUT FILEPATH
gridded_input_path = "/DATASET/OCB_traces/input_test_6sat_2022.nc"

#### 2017

In [None]:
# TIME VALUES
min_time = '2017-01-01'
max_time = '2017-12-31'

# LAT LON VALUES
min_lon = -66
max_lon = -54
min_lat = 32
max_lat = 44

# DATASET ID WITH BRACKETS FOR IN PLACE OF SATELLITE NAME
copernicus_dataset_id = "cmems_obs-sl_glo_phy-ssh_my_{}-l3-duacs_PT1S"

# SATELLITES NAMES TO DOWNLOAD
satellites = ['alg', 'h2ag', 'j2g', 'j2n', 'j3', 's3a']
ref_satellites = ['c2']

# DIR TO STORE DOWNLOADED DATA WITH BRACKETS FOR IN PLACE OF SATELLITE NAME
download_sat_input_dir = '/DATASET/OCB_traces/dl/2017/input/{}'
download_sat_ref_dir = '/DATASET/OCB_traces/dl/2017/ref/{}'

# DIR TO STORE CONCATENATED DATA
concatenated_input_path = "/DATASET/OCB_traces/concat/concatenated_input_2017.nc"
concatenated_ref_path = "/DATASET/OCB_traces/concat/concatenated_ref_2017.nc"

# GRIDDED INPUT FILEPATH
gridded_input_path = "/DATASET/OCB_traces/input_test_6sat_2017.nc"

## DOWNLOAD

In [5]:
# INPUT DATA
for sat in satellites:
    download_copernicus_data_for_sat(sat=sat, download_dir=download_sat_input_dir.format(sat), min_time=min_time, max_time=max_time, copernicus_dataset_id=copernicus_dataset_id)

Starting


Fetching catalog: 100%|██████████| 3/3 [00:10<00:00,  3.64s/it]


INFO - 2024-07-09T08:34:03Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-07-09T08:34:03Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-07-09T08:34:03Z - Service was not specified, the default one was selected: "original-files"
INFO - 2024-07-09T08:34:03Z - Downloading using service original-files...


100%|██████████| 909/909 [06:06<00:00,  2.48it/s] 


Starting output validation
Succesfully validated output
Starting
INFO - 2024-07-09T08:40:14Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-07-09T08:40:14Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-07-09T08:40:14Z - Service was not specified, the default one was selected: "original-files"
INFO - 2024-07-09T08:40:14Z - Downloading using service original-files...


100%|██████████| 920/920 [06:28<00:00,  2.37it/s]


Starting output validation
Succesfully validated output
Starting
INFO - 2024-07-09T08:46:46Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-07-09T08:46:46Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-07-09T08:46:46Z - Service was not specified, the default one was selected: "original-files"
INFO - 2024-07-09T08:46:46Z - Downloading using service original-files...


100%|██████████| 896/896 [06:01<00:00,  2.48it/s]


Starting output validation
Succesfully validated output
Starting
INFO - 2024-07-09T08:52:50Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-07-09T08:52:50Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-07-09T08:52:50Z - Service was not specified, the default one was selected: "original-files"
INFO - 2024-07-09T08:52:50Z - Downloading using service original-files...


100%|██████████| 799/799 [05:25<00:00,  2.45it/s]


Starting output validation
Succesfully validated output
Starting
INFO - 2024-07-09T08:58:19Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-07-09T08:58:19Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-07-09T08:58:19Z - Service was not specified, the default one was selected: "original-files"
INFO - 2024-07-09T08:58:19Z - Downloading using service original-files...


100%|██████████| 921/921 [06:20<00:00,  2.42it/s]


Starting output validation
Succesfully validated output
Starting
INFO - 2024-07-09T09:04:43Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-07-09T09:04:43Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-07-09T09:04:43Z - Service was not specified, the default one was selected: "original-files"
INFO - 2024-07-09T09:04:43Z - Downloading using service original-files...


100%|██████████| 921/921 [06:20<00:00,  2.42it/s]


Starting output validation
Succesfully validated output


In [6]:
# REFERENCE DATA
for sat in ref_satellites:
    download_copernicus_data_for_sat(sat=sat, download_dir=download_sat_ref_dir.format(sat), min_time=min_time, max_time=max_time, copernicus_dataset_id=copernicus_dataset_id)

Starting
INFO - 2024-07-09T09:11:06Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-07-09T09:11:06Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-07-09T09:11:06Z - Service was not specified, the default one was selected: "original-files"
INFO - 2024-07-09T09:11:06Z - Downloading using service original-files...


100%|██████████| 847/847 [06:20<00:00,  2.23it/s]


Starting output validation
Succesfully validated output
Starting
INFO - 2024-07-09T09:17:29Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-07-09T09:17:29Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-07-09T09:17:29Z - Service was not specified, the default one was selected: "original-files"
INFO - 2024-07-09T09:17:29Z - Downloading using service original-files...


100%|██████████| 244/244 [01:49<00:00,  2.24it/s]

Starting output validation
Succesfully validated output





## CONCATENATE

In [9]:
# INPUT DATA
filt_daily_ssh_data(
    input_dir=download_sat_input_dir.format(''),
    output_path=concatenated_input_path,
    min_time=min_time,
    max_time=max_time,
    min_lon=min_lon,
    max_lon=max_lon,
    min_lat=min_lat,
    max_lat=max_lat
    )

Starting
Starting input validation
Succesfully validated input
Starting output validation
Succesfully validated output


In [10]:
# REFERENCE DATA
filt_daily_ssh_data(
    input_dir=download_sat_ref_dir.format(''),
    output_path=concatenated_ref_path,
    min_time=min_time,
    max_time=max_time,
    min_lon=min_lon,
    max_lon=max_lon,
    min_lat=min_lat,
    max_lat=max_lat
    )

Starting
Starting input validation
Succesfully validated input
Starting output validation
Succesfully validated output


## GRIDDING

In [12]:
# INPUT DATA
grid_input(
    input_path=concatenated_input_path,
    output_path=gridded_input_path,
    min_time=min_time,
    max_time=max_time,
    min_lon=min_lon,
    max_lon=max_lon,
    min_lat=min_lat,
    max_lat=max_lat,
    degrees=0.083
)

## TESTS

In [11]:
gridded_input_data = xr.load_dataset(gridded_input_path)
concat_ref_data = xr.load_dataset(concatenated_ref_path)
print('GRIDDED INPUT\nMIN TIME: {}\nMAX TIME: {}'.format(gridded_input_data.time.values.min(), gridded_input_data.time.values.max()))
print('\n\nCONCAT REF\nMIN TIME: {}\nMAX TIME: {}'.format(concat_ref_data.time.values.min(), concat_ref_data.time.values.max()))

GRIDDED INPUT
MIN TIME: 2023-01-01T00:00:00.000000000
MAX TIME: 2023-12-31T00:00:00.000000000


CONCAT REF
MIN TIME: 2023-01-01T00:02:36.999724800
MAX TIME: 2023-12-30T21:59:29.309960192
