In [None]:
import pandas as pd
import plotly.express as px
import xarray as xr

pd.options.plotting.backend = "plotly"

## Load dataset

---


In [2]:
bats_bottle = xr.load_dataset("../../2_processed/bats_bottle.nc")
bats_primary_production = xr.load_dataset(
    "../../2_processed/bats_primary_production.nc"
)
bats_zooplankton = xr.load_dataset("../../2_processed/bats_zooplankton.nc")

In [3]:
bats_zooplankton = bats_zooplankton[
    ["dry_weight_vol_water_ratio", "wet_weight_vol_water_ratio"]
]
bats_zooplankton

In [4]:
bats_primary_production

In [5]:
bats_bottle

## Manage index

---


In [6]:
times = pd.DataFrame(
    {
        "btl_time": bats_bottle.time.to_series(),
        "zpk_time": bats_zooplankton.time.to_series(),
        "pp_time": bats_primary_production.time.to_series(),
    }
)
fig = px.histogram(
    times,
    x=["btl_time", "zpk_time", "pp_time"],
    title="time distribution",
    opacity=0.5,
    log_y=True,
    nbins=30,
    barmode="overlay",
    marginal="box",
    labels={"value": "time (m)", "variable": "Datasets"},
)

fig.show()

In [7]:
depths = pd.DataFrame(
    {
        "ctd_depth": bats_bottle.depth.to_series(),
        "zpk_depth": bats_zooplankton.depth.to_series(),
        "pp_depth": bats_primary_production.depth.to_series(),
    }
)
px.box(
    depths.stack().reset_index().rename(columns={"level_1": "dataset"}),
    x="dataset",
    y="depth",
)

## Migrant and resident


## Gathering

---


# TODO : Select only the wanted variables in each dataset


In [8]:
# Temporary, need to select the temp to keep
bats_bottle = bats_bottle.drop_vars("temp")

In [9]:
final_dataset = xr.merge([bats_zooplankton, bats_primary_production, bats_bottle])
final_dataset.load()

In [10]:
final_dataset.to_netcdf("../../3_post_processed/bats_product.nc", mode="w")



