In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.figure_factory as ff
import xarray as xr
import matplotlib.pyplot as plt

pd.options.plotting.backend = "plotly"

## Load dataset

---


In [None]:
bats_bottle = xr.open_zarr("../../2_processed/bats_bottle.zarr")
bats_primary_production = xr.open_zarr("../../2_processed/bats_primary_production.zarr")
bats_zooplankton = xr.open_zarr("../../2_processed/bats_zooplankton.zarr")

In [None]:
bats_zooplankton

In [None]:
bats_primary_production

In [None]:
bats_bottle

## Manage index

---


In [None]:
times = pd.DataFrame(
    {
        "btl_time": bats_bottle.time.to_series(),
        "zpk_time": bats_zooplankton.time.to_series(),
        "pp_time": bats_primary_production.time.to_series(),
    }
)
fig = px.histogram(
    times,
    x=["btl_time", "zpk_time", "pp_time"],
    title="time distribution",
    opacity=0.5,
    log_y=True,
    nbins=30,
    barmode="overlay",
    marginal="box",
    labels={"value": "time (m)", "variable": "Datasets"},
)

fig.show()

In [None]:
depths = pd.DataFrame(
    {
        "ctd_depth": bats_bottle.depth.to_series(),
        "zpk_depth": bats_zooplankton.depth.to_series(),
        "pp_depth": bats_primary_production.depth.to_series(),
    }
)
px.box(
    depths.stack().reset_index().rename(columns={"level_1": "dataset"}),
    x="dataset",
    y="depth",
)

## Migrant and resident


In [None]:
with xr.set_options(keep_attrs=True):
    night_condition = (bats_zooplankton.time.dt.hour > 18) | (
        bats_zooplankton.time.dt.hour < 6
    )

    bats_zooplankton = bats_zooplankton.assign_coords(is_night=night_condition)
    bats_zooplankton.is_night.attrs = {
        "flag_values": f"{[True, False]}",
        "flag_meanings": "night day",
        "standard_name": "is_night",
        "long_name": "Is night",
    }
    zpk_resident = (
        bats_zooplankton.swap_dims({"time": "is_night"})
        .sel(is_night=False)
        .swap_dims({"is_night": "time"})
        .drop_vars("is_night")
    )

    zpk_migrant = (
        bats_zooplankton.swap_dims({"time": "is_night"})
        .sel(is_night=True)
        .swap_dims({"is_night": "time"})
        .drop_vars("is_night")
    )
    zpk_resident = zpk_resident.resample(time="1D").mean().dropna("time", how="all")
    zpk_migrant = zpk_migrant.resample(time="1D").mean().dropna("time", how="all")
    zpk_migrant = zpk_migrant - zpk_resident
    zpk_migrant = xr.where(zpk_migrant > 0, zpk_migrant, 0).dropna("time", how="all")
    zpk_migrant = zpk_migrant[
        ["dry_weight_vol_water_ratio", "wet_weight_vol_water_ratio"]
    ].rename(
        {
            "dry_weight_vol_water_ratio": "dry_weight_migrant",
            "wet_weight_vol_water_ratio": "wet_weight_migrant",
        }
    )
    zpk_resident = zpk_resident[
        ["dry_weight_vol_water_ratio", "wet_weight_vol_water_ratio"]
    ].rename(
        {
            "dry_weight_vol_water_ratio": "dry_weight_resident",
            "wet_weight_vol_water_ratio": "wet_weight_resident",
        }
    )
    bats_zooplankton = xr.merge([zpk_migrant, zpk_resident])
    # reorder dims as : time -> lat -> lon -> depth -> sieve_size
    bats_zooplankton = bats_zooplankton.transpose(
        "sieve_size", "time", "latitude", "longitude", "depth"
    )
bats_zooplankton

## Gathering

---


# TODO : Select only the wanted variables in each dataset


In [None]:
final_dataset = xr.merge([bats_zooplankton, bats_primary_production, bats_bottle])
final_dataset