In [211]:
import os
import pandas as pd
import rioxarray as rxr
import xarray as xr
from dataclasses import asdict
from dscreator.cfarray.attributes import DatasetAttrsGrid, VariableAttrs
from datetime import datetime
from dscreator import utils
from dscreator.storage import BaseHandler, get_storage_handler
from dscreator.sources.ferrybox.extractor import TrajectoryExtractor, NamedTrajectory, NamedArray
from sqlalchemy import create_engine
from dscreator.config import SETTINGS
from dscreator.datasets.trajectories.ferrybox import FerryboxTrajBuilder
import numpy as np

In [212]:
boat = "FA"
measurement_parameters = ["Temperature", "Salinity", "Oxygen"]
uuid="29b7de62-e1fa-4dce-90e4-7ff8a0931397"
datasetname="FA_ferrybox_2017_2022"
stationname="Color Fantasy"
projectname="NorSoop"

In [213]:
engine = create_engine(SETTINGS.database_url)
te = TrajectoryExtractor(engine, boat, measurement_parameters)
ts = te.fetch_slice(start_time=datetime(2017, 1, 1, 0, 0, 0), end_time=datetime(2017, 1, 31, 0, 0, 0))

In [214]:
print(f"Creating manual QC lookup list")
rm_T = []
rm_ox = []
for file in os.listdir(os.getcwd()+'/Norsoop-manual-qc-files'):
    if file.startswith(boat) and 'ox_sat' not in file:
        rm_T.append(pd.read_csv('Norsoop-manual-qc-files/'+file, header=None))
    if file.startswith(boat) and 'ox_sat' in file:
        rm_ox.append(pd.read_csv('Norsoop-manual-qc-files/'+file, header=None))
bad_T = pd.concat(rm_T, ignore_index=True)
bad_ox = pd.concat(rm_ox, ignore_index=True)
bad_T = np.array([dt.to_pydatetime() for dt in pd.to_datetime(bad_T[bad_T.columns[0]]).to_list()])
bad_ox = np.array([dt.to_pydatetime() for dt in pd.to_datetime(bad_ox[bad_ox.columns[0]]).to_list()])
i_bad_T = np.intersect1d(np.array(ts.datetime_list), bad_T, return_indices=True)[1]
i_bad_ox = np.intersect1d(np.array(ts.datetime_list), bad_ox, return_indices=True)[1]
print(f"Found {len(i_bad_T)} timestamp with no flow, and {len(i_bad_ox)} with bad oxygen")
print(f"{len(np.intersect1d(i_bad_T, i_bad_ox))} timestamps overlap")
print(f"About to set values for oxygen to None for bad oxygen timestamps")
ts = NamedTrajectory(
    array_list=[NamedArray(nta.variable_name, [None if i in i_bad_ox else val for i,val in enumerate(nta.values)])
                if nta.variable_name=="Oxygen" else nta for nta in ts.array_list],
    datetime_list=ts.datetime_list,
    locations=ts.locations)

print(f"About to remove bad flow data. Before removal size of data based on location is "
      f"{len(ts.locations)}")
ts = NamedTrajectory(
    array_list=[NamedArray(nta.variable_name, list(np.delete(nta.values, i_bad_T)))
                for nta in ts.array_list],
    datetime_list=list(np.delete(ts.datetime_list, i_bad_T)),
    locations=list(np.delete(ts.locations, i_bad_T)))
print(f"After removal size of data based on location is {len(ts.locations)}")

Creating manual QC lookup list
Found 6607 timestamp with no flow, and 5215 with bad oxygen
5210 timestamps overlap
About to set values for oxygen to None for bad oxygen timestamps
About to remove bad flow data. Before removal size of data based on location is 33320
After removal size of data based on location is 26713


In [215]:
tb = FerryboxTrajBuilder(
    uuid=uuid,
    dataset_name=datasetname,
    station_name=stationname,
    project_name=projectname,
    is_acdd=True,
)
ds = tb.create(ts)

In [216]:
if ds.dims["time"] > 0:
    print(f"Saving dataset slice {ds.time[0].values} --> {ds.time[-1].values}")

    sh= get_storage_handler(
        project_name=str(datasetname),
        dataset_name=str(stationname),
        unlimited_dims=["time"],
        filename_prefix= "FA"
    )
    sh.save_dataset(ds)
    print("Dataset was successfully saved")
else:
    print("Found no data for interval")

Saving dataset slice 2017-01-01T13:25:03.000000000 --> 2017-01-30T23:59:23.000000000
Dataset was successfully saved
