In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import xarray as xr
from plotly.subplots import make_subplots
from scipy.stats import t
from statsmodels.nonparametric.smoothers_lowess import lowess
import plotly.express as px

In [2]:
station = "Hot"
time_start = "2002-01-01"
time_end = "2015-01-01"

## Load


In [3]:
data = xr.load_dataset(f"../data/1_products/{station}_obs_filtered.zarr", engine="zarr")
data

## Computing the trend


In [4]:
def plot_trend(day_data, night_data, poly_order=1, confidence_level=0.95):
    # day_data = data["day"][:, 0, 0, 0].dropna(dim="time")
    time_indices = (day_data["time"] - day_data["time"][0]).astype(int)  # .astype("timedelta64[D]").astype(int)
    day_trend = np.polyfit(time_indices, day_data, poly_order)
    day_trend_line = np.polyval(day_trend, time_indices)
    day_std_error = np.std(day_data - day_trend_line) / np.sqrt(len(day_data))
    degrees_freedom_day = len(day_data) - 2
    t_critical_day = t.ppf((1 + confidence_level) / 2, degrees_freedom_day)
    day_confidence_interval = float(t_critical_day * day_std_error)

    # night_data = data["night"][:, 0, 0, 0].dropna(dim="time")
    night_time_indices = (night_data["time"] - night_data["time"][0]).astype(
        int
    )  # .astype("timedelta64[D]").astype(int)
    night_trend = np.polyfit(night_time_indices, night_data, poly_order)
    night_trend_line = np.polyval(night_trend, night_time_indices)
    night_std_error = np.std(night_data - night_trend_line) / np.sqrt(len(night_data))
    degrees_freedom_night = len(night_data) - 2
    t_critical_night = t.ppf((1 + confidence_level) / 2, degrees_freedom_night)
    night_confidence_interval = float(t_critical_night * night_std_error)

    figure = go.Figure()

    figure.add_trace(
        go.Scatter(
            x=day_data.indexes["time"],
            y=day_data,
            mode="markers",
            name="Day Data",
            marker={"color": "firebrick"},
        )
    )
    figure.add_trace(
        go.Scatter(
            x=day_data.indexes["time"],
            y=day_trend_line,
            mode="lines",
            name="Day Trend Line",
            line={"color": "firebrick"},
        )
    )
    figure.add_trace(
        go.Scatter(
            x=list(day_data.indexes["time"]) + list(day_data.indexes["time"])[::-1],
            y=list(day_trend_line + day_confidence_interval) + list(day_trend_line - day_confidence_interval)[::-1],
            fill="toself",
            fillcolor="rgba(255, 0, 0, 0.2)",
            line_color="rgba(0, 0, 0, 0)",
            name="Day Confidence Interval",
        )
    )
    figure.add_trace(
        go.Scatter(
            x=night_data.indexes["time"],
            y=night_data,
            mode="markers",
            name="Day Data",
            marker={"color": "royalblue"},
        )
    )
    figure.add_trace(
        go.Scatter(
            x=night_data.indexes["time"],
            y=night_trend_line,
            mode="lines",
            name="Night Trend Line",
            line={"color": "royalblue"},
        )
    )

    figure.add_trace(
        go.Scatter(
            x=list(night_data.indexes["time"]) + list(night_data.indexes["time"])[::-1],
            y=list(night_trend_line + night_confidence_interval)
            + list(night_trend_line - night_confidence_interval)[::-1],
            fill="toself",
            fillcolor="rgba(0, 0, 255, 0.2)",
            line_color="rgba(0, 0, 0, 0)",
            name="Night Confidence Interval",
        ),
    )

    figure.update_layout(height=600)
    return figure

In [5]:
confidence_level = 0.95
day_data = data["day"][:, 0, 0, 0].dropna(dim="time")
night_data = data["night"][:, 0, 0, 0].dropna(dim="time")

plot_trend(day_data, night_data, poly_order=1).update_layout(
    title=f"{station} Day and Night Trend (1st order polynomial fit)",
    xaxis_title="Time",
    yaxis_title="Biomass (mg/m3)",
).show()

In [6]:
confidence_level = 0.95
day_data = data["day"][:, 0, 0, 0].dropna(dim="time")
night_data = data["night"][:, 0, 0, 0].dropna(dim="time")

plot_trend(day_data, night_data, poly_order=2).update_layout(
    title=f"{station} Day and Night Trend (2nd order polynomial fit)",
    xaxis_title="Time",
    yaxis_title="Biomass (mg/m3)",
).show()

In [7]:
fig = go.Figure()

# Plot day data
fig.add_trace(
    go.Scatter(
        x=data.day.dropna("time").time.data.flatten(),
        y=data.day.dropna("time").data.flatten(),
        mode="lines",
        name="Day",
    )
)

# Plot night data
fig.add_trace(
    go.Scatter(
        x=data.night.dropna("time").time.data.flatten(),
        y=data.night.dropna("time").data.flatten(),
        mode="lines",
        name="Night",
    )
)

# Add vertical lines for 2000 and 2015
fig.add_shape(
    type="line",
    x0=time_start,
    y0=0,
    x1=time_start,
    y1=1,
    xref="x",
    yref="paper",
    line=dict(color="black", width=2, dash="dash"),
    name="Year 2000",
)

fig.add_shape(
    type="line",
    x0=time_end,
    y0=0,
    x1=time_end,
    y1=1,
    xref="x",
    yref="paper",
    line=dict(color="black", width=2, dash="dash"),
    name="Year 2015",
)

# Update layout
fig.update_layout(
    title=f"Day and Night Data at {station} : remove interannual variability",
    xaxis_title="Time",
    yaxis_title="Zooplankton biomass (mg/m3)",
    legend_title="Legend",
    height=600,
    # width=800,
    showlegend=True,
)

fig.show()

In [8]:
data = data.sel(time=slice(time_start, time_end))

In [9]:
confidence_level = 0.95
day_data = data["day"][:, 0, 0, 0].dropna(dim="time")
night_data = data["night"][:, 0, 0, 0].dropna(dim="time")

plot_trend(day_data, night_data, poly_order=2).show()

## Number of samples


In [10]:
counts_month = (
    data.groupby("time.month")
    .count()
    .isel(latitude=0, longitude=0, layer=0)
    .drop_vars(["latitude", "longitude", "layer"])
    .to_dataframe()
)
counts_year = (
    data.groupby("time.year")
    .count()
    .isel(latitude=0, longitude=0, layer=0)
    .drop_vars(["latitude", "longitude", "layer"])
    .to_dataframe()
)

In [11]:
px.bar(
    counts_month,
    x=counts_month.index,
    y=counts_month.columns,
    # side by side
    barmode="group",
    title=f"Counts of {station} data per month",
    labels={"value": "Counts"},
    color_discrete_sequence=px.colors.qualitative.Plotly,
).update_layout(xaxis_title="Month", height=600).show()

In [12]:
px.bar(
    counts_year,
    x=counts_year.index,
    y=counts_year.columns,
    # side by side
    barmode="group",
    title=f"Counts of {station} data per year",
    labels={"value": "Counts"},
    color_discrete_sequence=px.colors.qualitative.Plotly,
).update_layout(xaxis_title="Year", height=600).show()

## Group data by month


In [13]:
grouped_data = xr.Dataset({"day": data.day, "night": data.night})

# Calculer les moyennes mensuelles des données détrendées
grouped_data = grouped_data.groupby("time.month").mean()

# ajouter des nan pour les semaines manquantes
grouped_data = grouped_data.reindex(month=np.arange(1, 13))
grouped_data

## Raw data vs Smoothing


In [14]:
grouped_data_less = grouped_data.assign_coords(month=grouped_data.month - 12)
grouped_data_more = grouped_data.assign_coords(month=grouped_data.month + 12)

gathered_data = xr.concat([grouped_data_less, grouped_data, grouped_data_more], dim="month")

fig = make_subplots(
    rows=2,
    cols=1,
    shared_xaxes=True,
    subplot_titles=("Day", "Night"),
    vertical_spacing=0.1,
    x_title="month of year",
    y_title="Zooplankton biomass (mg/m3)",
)
fig.add_trace(
    go.Scatter(x=gathered_data.month.data.flatten(), y=gathered_data.day.data.flatten(), mode="lines", name="day"),
    row=1,
    col=1,
)
fig.add_trace(
    go.Scatter(x=gathered_data.month.data.flatten(), y=gathered_data.night.data.flatten(), mode="lines", name="night"),
    row=2,
    col=1,
)
for frac in [0.15]:
    lowess_day = lowess(gathered_data.day.data.flatten(), gathered_data.month.data.flatten(), frac=frac)
    fig.add_trace(
        go.Scatter(x=lowess_day[:, 0], y=lowess_day[:, 1], mode="lines", name=f"day lowess {frac}"),
        row=1,
        col=1,
    )

    lowess_night = lowess(gathered_data.night.data.flatten(), gathered_data.month.data.flatten(), frac=frac)
    fig.add_trace(
        go.Scatter(x=lowess_night[:, 0], y=lowess_night[:, 1], mode="lines", name=f"night lowess {frac}"),
        row=2,
        col=1,
    )
    gathered_data[f"day_lowess_{frac}"] = xr.DataArray(
        lowess_day[:, 1],
        dims=["month"],
        coords={"month": lowess_day[:, 0]},
        attrs={"frac": frac, "units": "mg/m3"},
    ).sel(month=slice(1, 12))
    gathered_data[f"night_lowess_{frac}"] = xr.DataArray(
        lowess_night[:, 1],
        dims=["month"],
        coords={"month": lowess_night[:, 0]},
        attrs={"frac": frac, "units": "mg/m3"},
    ).sel(month=slice(1, 12))

fig.update_layout(
    title=f"Day and night zooplankton observations at {station}",
    height=800,
)

fig.show()

## Duplicating observation across the entire time period


In [15]:
value_by_month = (
    gathered_data.to_dataframe().reset_index().drop(columns=["latitude", "longitude", "layer"]).set_index("month")
)

time = pd.date_range(start=data.indexes["time"][0], end=data.indexes["time"][-1], freq="MS") + pd.DateOffset(days=14)
month = time.month
month_by_day = pd.DataFrame({"month": month}, index=time)

aggregated_smoothed = month_by_day.join(value_by_month, on="month").drop(columns=["month"])
aggregated_smoothed.index.name = "time"

climato_observations = xr.Dataset.from_dataframe(aggregated_smoothed)
for var in climato_observations:
    climato_observations[var].attrs = {"units": "mg/m3"}
climato_observations.attrs = {
    "desc": "Aggregated and smoothed observations using lowess filtre.",
}
climato_observations

## Plot final results


In [16]:
day_names = ["day", "day_lowess_0.15"]
night_names = ["night", "night_lowess_0.15"]

fig = make_subplots(
    rows=2,
    cols=1,
    subplot_titles=("Day Data", "Night Data"),
    shared_xaxes=True,
    vertical_spacing=0.1,
)
# augmente la taille de la figure
fig.update_layout(height=800)

# -------------------------------------------------------------------------------------------------------------------- #

# Ajouter les données d'observation initiales de jour
fig.add_trace(
    go.Scatter(
        x=day_data["time"],
        y=day_data,
        mode="lines",
        name="Initial Day Data",
    ),
    row=1,
    col=1,
)

# Ajouter les données d'observation initiales de nuit
fig.add_trace(
    go.Scatter(
        x=night_data["time"],
        y=night_data,
        mode="lines",
        name="Initial Night Data",
    ),
    row=2,
    col=1,
)

# -------------------------------------------------------------------------------------------------------------------- #

for day, night in zip(day_names, night_names):
    # Ajouter les données de jour
    fig.add_trace(
        go.Scatter(
            x=aggregated_smoothed.index,
            y=aggregated_smoothed[day],
            mode="lines",
            name=day,
        ),
        row=1,
        col=1,
    )

    # Ajouter les données de nuit
    fig.add_trace(
        go.Scatter(
            x=aggregated_smoothed.index,
            y=aggregated_smoothed[night],
            mode="lines",
            name=night,
        ),
        row=2,
        col=1,
    )

# -------------------------------------------------------------------------------------------------------------------- #

# Mettre à jour la mise en page
fig.update_layout(
    title=f"Comparison of Initial and Processed Data at {station} : With Trend",
    xaxis_title="Time",
    yaxis_title="Zooplankton biomass (mg/m3)",
    # night mode
    # template="plotly_dark",
)

fig.show()

In [17]:
# fig.write_html(f"Hot_climato_observations_filtered.html")

## Export


In [18]:
climato_observations = climato_observations.expand_dims(
    latitude=[data.latitude[0]], longitude=[data.longitude[0]], layer=[data.layer[0]]
)
climato_observations = climato_observations.transpose("time", "latitude", "longitude", "layer")

In [19]:
climato_observations.latitude.attrs = data.latitude.attrs
climato_observations.longitude.attrs = data.longitude.attrs
climato_observations.layer.attrs = data.layer.attrs
climato_observations.time.attrs = data.time.attrs
climato_observations.day.attrs = data.day.attrs
climato_observations.night.attrs = data.night.attrs
climato_observations

In [20]:
climato_observations.to_zarr(
    f"../data/1_products/{station}_obs_zoo_climato_monthly_{pd.Timestamp(time_start).year}_{pd.Timestamp(time_end).year}.zarr",
    mode="w",
)

<xarray.backends.zarr.ZarrStore at 0x17f9daec0>