# Data Quality Control

In [9]:
# import modules
from pathlib import Path
import csv

import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import gsw

%matplotlib inline

In [14]:
workspace = Path.cwd()

In [None]:
# load data

nso1968_df = pd.read_csv(
    f"{workspace}/data/nso_1968.csv",
    header=0,
    delimiter="\t",
    usecols=[3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14],
    names=[
        "time",
        "lon",
        "lat",
        "depth",
        "temperature",
        "t_qc_flag",
        "salinity",
        "s_qc_flag",
        "dissolved_oxygen",
        "d_qc_flag",
        "qc_level",
    ],
    dtype={
        "Date(YYYY-MM-DD)": str,
        "Latitude": float,
        "Longitude": float,
        "Depth(m)": int,
        "Temperature(C)": float,
        "T QC Flag": int,
        "Salinity(psu)": float,
        "S QC Flag": int,
        "Dissolved Oxygen(mg/L)": float,
        "D QC Flag": int,
        "QC Level": int,
    },
)
nso1968_df['time'] = pd.to_datetime(nso1968_df['time'])


In [40]:
nso1968_df = nso1968_df.set_index(["time", "lon", "lat", "depth"])

In [42]:
nso1968_df.to_hdf(f"{workspace}/data/nso_1968.h5", key="nso1968", mode="w", format="table")

In [43]:
nso1968_ds = xr.Dataset.from_dataframe(nso1968_df, sparse=True)

In [44]:
nso1968_ds.info

<bound method Dataset.info of <xarray.Dataset> Size: 1MB
Dimensions:           (time: 1327, lon: 87, lat: 183, depth: 452)
Coordinates:
  * time              (time) datetime64[ns] 11kB 1968-01-11T12:30:00 ... 1968...
  * lon               (lon) float64 696B 32.0 32.5 32.93 ... 37.28 37.55 37.9
  * lat               (lat) float64 1kB 124.4 124.4 124.4 ... 133.1 133.7 134.3
  * depth             (depth) int64 4kB 0 10 12 15 18 ... 1032 1033 1037 1080
Data variables:
    temperature       (time, lon, lat, depth) float64 165kB <COO: nnz=10342, fill_value=nan>
    t_qc_flag         (time, lon, lat, depth) float64 165kB <COO: nnz=10342, fill_value=nan>
    salinity          (time, lon, lat, depth) float64 165kB <COO: nnz=10342, fill_value=nan>
    s_qc_flag         (time, lon, lat, depth) float64 165kB <COO: nnz=10342, fill_value=nan>
    dissolved_oxygen  (time, lon, lat, depth) float64 165kB <COO: nnz=10342, fill_value=nan>
    d_qc_flag         (time, lon, lat, depth) float64 165kB <COO: 