# Pteropod Climatology at PAPA stations

## Set up

In [None]:
# imports
import xarray as xr
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np 

In [95]:
#load dataset 
papa_data = xr.load_dataset(
    "../2_processed/papa_zooplankton.zarr", engine="zarr"
)
papa_data

In [96]:
#extract pteropod data
data_pter=papa_data['pteropods']
data_pter

In [97]:
# define seasons
def get_season(month):
    if month in [12, 1, 2]: #DJF
        return "Winter"
    elif month in [3, 4, 5]: #MAM
        return "Spring"
    elif month in [6, 7, 8]: #JJA
        return "Summer"
    else: # SON
        return "Autumn"


## Checking Data 

<div style="padding: 10px; border-radius: 5px; background-color: #cce5ff; color: #004085; border-left: 5px solid #007bff;">
    <strong>Possible Bias :</strong> months/years over represented, change in the observation method, locations without enough data, missing data ...
</div>


### Time

In [46]:
# Count non-NaN values for each time step
counts = data_pter.count(dim=["latitude", "longitude", "depth", "is_day"])

# Convert to a pandas DataFrame for Plotly
df_counts = counts.to_dataframe(name="count").reset_index()

In [9]:
# per year
df_counts["year"] = df_counts["time"].dt.year
yearly_counts = df_counts.groupby("year")["count"].sum().reset_index()

fig = px.bar(
    yearly_counts,
    x="year",
    y="count",
    labels={"count": "Total non-NaN entries"},
    title="Yearly valid data entries for pteropods"
)
fig.show()


<div style="padding: 10px; border-radius: 5px; background-color: #cce5ff; color: #004085; border-left: 5px solid #007bff;">
    <strong>Note :</strong> maybe do a 1997-2007 10 year climatology ?
</div>


In [10]:
# per month
df_counts["month"] = df_counts["time"].dt.month
monthly_counts = df_counts.groupby("month")["count"].sum().reset_index()

fig = px.bar(
    monthly_counts,
    x="month",
    y="count",
    labels={"count": "Total non-NaN entries"},
    title="Montly valid data entries for pteropods"
)
fig.show()


<div class="alert alert-danger">
    <strong>Warning :</strong>  Few data in January, March, April, October, November, December
</div>


In [None]:
#per season
df_counts["season"] = df_counts["month"].apply(get_season)
seasonal_counts = df_counts.groupby("season")["count"].sum().reset_index()

fig = px.bar(
    seasonal_counts,
    x="season",
    y="count",
    category_orders={"season": ["Winter", "Spring", "Summer", "Autumn"]},  
    labels={"count": "Total non-NaN entries"},
    title="Valid pteropod data entries by season"
)

fig.show()


### Depth

In [50]:
#per depth
per_depth = data_pter.count(dim=["latitude", "longitude", "time", "is_day"])
df_depth = per_depth.to_dataframe(name="count").reset_index()

fig = px.bar(
    df_depth,
    x="depth",
    y="count",
    labels={"count": "Total non-NaN entries"},
    title="Valid pteropod data entries by depth"
)

fig.show()


### Location 

In [20]:
#per lat
per_lat = data_pter.count(dim=["time", "longitude", "depth", "is_day"])
df_lat = per_lat.to_dataframe(name="count").reset_index()

fig = px.bar(
    df_lat,
    x="latitude",
    y="count",
    labels={"count": "Total non-NaN entries", "latitude": "Latitude"},
    title="Valid pteropod data entries by latitude"
)

fig.show()


In [22]:
#per lon
per_lon = data_pter.count(dim=["time", "latitude", "depth", "is_day"])
df_lon = per_lon.to_dataframe(name="count").reset_index()

fig = px.bar(
    df_lon,
    x="longitude",
    y="count",
    labels={"count": "Total non-NaN entries", "longitude": "Longitude"},
    title="Valid pteropod data entries by longitude"
)

fig.show()


In [41]:
#per location
per_lat_lon = data_pter.count(dim=["time", "depth", "is_day"])
df_lat_lon = per_lat_lon.to_dataframe(name="non_nan_count").reset_index()

heatmap_data = df_lat_lon.pivot(index="latitude", columns="longitude", values="non_nan_count")

fig = go.Figure(data=go.Heatmap(
    z=heatmap_data.values,
    x=heatmap_data.columns,
    y=heatmap_data.index,
    colorscale='plasma',
    colorbar=dict(title="Non-NaN entries")
))
#Labels
fig.update_layout(
    yaxis=dict(
        title="Latitude"
    ),
    xaxis=dict(
        title="Longitude"
    ),
    title="Heatmap of valid pteropod data entries by latitude and longitude"
)

fig.show()


<div style="padding: 10px; border-radius: 5px; background-color: #cce5ff; color: #004085; border-left: 5px solid #007bff;">
    <strong>Note :</strong> Locations to select : <br> lat : 50.5 lon : -129.5 <br> lat : 48.5 lon : -130.5 <br> lat : 48.5 lon : -128.5 <br> lat : 49.5 lon : -138.5 <br> lat : 49.5 lon : -134.5 <br> lat : 49.5 lon : -128.5 
</div>

#### Locations*time

In [45]:
target_lat = 48.5
target_lon = -130.5

# Select the data
subset = data_pter.sel(latitude=target_lat, longitude=target_lon)

# Add a 'year' coordinate from the time dimension
subset = subset.assign_coords(year=subset["time.year"])

# Count non-NaN values per year
per_year = subset.groupby("year").count(dim="time")

# Sum over remaining dims to get one value per year
per_year_total = per_year.sum(dim=["depth", "is_day"]).to_dataframe(name="non_nan_count").reset_index()

# Plot the histogram
fig = px.bar(
    per_year_total,
    x="year",
    y="non_nan_count",
    labels={"year": "Year", "non_nan_count": "Non-NaN Entries"},
    title=f"Histogram of Valid Pteropod Data at lat={target_lat}, lon={target_lon}"
)
fig.show()



In [143]:
# List of target locations (lat, lon)
"""
locations = [
    (50.5, -129.5),
    (48.5, -130.5),
    (48.5, -128.5),
    (49.5, -138.5),
    (49.5, -134.5),
    (49.5, -128.5),
]
"""
locations = [
    (48.5, -130.5),
    (48.5, -129.5),
    (48.5, -128.5),
    (49.5, -130.5),
    (49.5, -129.5),
    (49.5, -128.5),
    (50.5, -130.5),
    (50.5, -129.5),
    (50.5, -128.5),
]
depth=data_pter["depth"]

#is_day=True
# Initialize an empty list to collect data
all_data = []

# Loop over each location and extract time series
for lat, lon in locations:
    for d in depth:
        for bool in [True,False]:
            subset = data_pter.sel(latitude=lat, longitude=lon,depth=d,is_day=bool)
            df = subset.to_dataframe(name="biomass").reset_index()
            df["location"] = f"lat={lat}, lon={lon}"
            all_data.append(df)

# Concatenate all DataFrames into one
df_all = pd.concat(all_data, ignore_index=True)

# Plot using scatter (dots), colored by location
fig = px.scatter(
    df_all,
    x="time",
    y="biomass",
    color="location",
    title="Pteropod Biomass Time Series at Selected Locations (dots only)",
    labels={"time": "Time", "biomass": "Biomass", "location": "Location", "depth":"Depth"},
)

fig.show()


### Zeros Values

In [None]:
# add a column year in dataset
years = pd.to_datetime(data_pter["time"].values).year
data_pter.coords["year"] = ("time", years)


In [None]:
# Zero values
zero_data = data_pter.where(data_pter == 0.)

# Count 
zero_counts = zero_data.count(dim=["latitude", "longitude", "depth", "is_day"]).groupby("year").sum()

# Convert in DataFrame
df_zeros = zero_counts.to_dataframe(name="zero_count").reset_index()

In [18]:
fig = px.bar(
    df_zeros,
    x="year",
    y="zero_count",
    labels={"zero_count": "Number of zeros"},
    title="Yearly zero values"
)

fig.show()


In [38]:
# Zero values
zero_data = data_pter.where(data_pter == 0.)

# Count 
zero_counts = zero_data.count(dim=["latitude", "longitude", "time", "is_day"]).groupby("depth").sum()

# Convert in DataFrame
df_zeros = zero_counts.to_dataframe(name="zero_count").reset_index()

fig = px.bar(
    df_zeros,
    x="depth",
    y="zero_count",
    labels={"zero_count": "Number of zeros"},
    title="Yearly zero values"
)

fig.show()

## Climatology 2° (1995-2020)

<div style="padding: 10px; border-radius: 5px; background-color: #cce5ff; color: #004085; border-left: 5px solid #007bff;">
    <strong>Note :</strong> According to checking_data/location/heatmap, in a first time, a 2° climatology will be done. <br> Also, all available times will be selected : a 1995-2020 climatology (26 years)
</div>


<div style="padding: 10px; border-radius: 5px; background-color: #d4edda; color: #155724; border-left: 5px solid #28a745;">
    <strong>Question:</strong> Do we keep 1995 (8 data)? same for 1996 (3 data) ?  and 1997 (9 data) to be consistent with forcings ? 
</div>


In [98]:
#Select locations
ds_pter_2deg = data_pter.sel(
    latitude=slice(48.5, 50.5),
    longitude=slice(-130.5, -128.5)
)
ds_pter_2deg

In [131]:
ds_pter_2deg['year'] = ds_pter_2deg['time.year']
# dataframe for plotly
df_all = ds_pter_2deg.to_dataframe().reset_index()


df_all = df_all.dropna(subset=["pteropods"])

# Extract month and year
df_all['month'] = df_all['time'].dt.month
df_all['year'] = df_all['time'].dt.year

# Suppress NaN
df_all = df_all.dropna(subset=["pteropods"])

fig = px.strip(
    df_all,
    x='month',
    y='pteropods',
    color='year',
    title='Pteropod Biomass Observations',
    labels={'month': 'Month', 'pteropods': 'Biomass mg / m3'},
)

fig.update_xaxes(
    tickmode='array',
    tickvals=list(range(1, 13)),
    ticktext=[
        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
    ]
)

fig.show()


In [57]:
# Histogram pteropod data by depth in the 2° area
per_depth = ds_pter_2deg.count(dim=["latitude", "longitude", "time", "is_day"])
df_depth = per_depth.to_dataframe(name="count").reset_index()

fig = px.bar(
    df_depth,
    x="depth",
    y="count",
    labels={"count": "Total non-NaN entries"},
    title="Valid pteropod data entries by depth"
)

fig.show()


In [48]:
# Count non-NaN values for each time step
counts = ds_pter_2deg.count(dim=["latitude", "longitude", "depth", "is_day"])

# Convert to a pandas DataFrame for Plotly
df_counts = counts.to_dataframe(name="count").reset_index()

In [9]:
# per year
df_counts["year"] = df_counts["time"].dt.year
yearly_counts = df_counts.groupby("year")["count"].sum().reset_index()

fig = px.bar(
    yearly_counts,
    x="year",
    y="count",
    labels={"count": "Total non-NaN entries"},
    title="Yearly valid data entries for pteropods in the selected 2° area"
)
fig.show()

### Monthly

In [50]:
# compute monthly climatology
# Extract month
ds_pter_2deg['month'] = ds_pter_2deg['time.month']

# Mean
monthly_mean = ds_pter_2deg.groupby('month').mean(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Standard deviation
monthly_std = ds_pter_2deg.groupby('month').std(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Non NaN count
monthly_count = ds_pter_2deg.groupby('month').count(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Median
monthly_median = ds_pter_2deg.groupby('month').median(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])

# DataFrames Conversion (to use px)
df_mean = monthly_mean.to_dataframe().reset_index()
df_std = monthly_std.to_dataframe().reset_index()
df_count = monthly_count.to_dataframe(name='non_nan_count').reset_index()
df_median=monthly_median.to_dataframe(name='pteropods_median').reset_index()

df_clim = df_mean.merge(df_std, on='month', suffixes=('_mean', '_std'))
df_clim = df_clim.merge(df_median,on='month')
df_clim = df_clim.merge(df_count, on='month')


print(df_clim)


    month  pteropods_mean  pteropods_std  pteropods_median  non_nan_count
0       1        0.463927       0.217691          0.455370              5
1       2        0.305677       0.380133          0.158055             42
2       3        0.353286       0.470167          0.110440              7
3       4        1.299415       1.208655          0.699350              6
4       5        8.603429      18.885279          2.042530             61
5       6        5.775160      14.995129          1.295875             52
6       7        1.056929       1.929591          0.352710             27
7       8        2.143262       6.147183          0.295250             47
8       9        2.060499       9.401260          0.284280             60
9      10        0.425594       0.379251          0.364525             19
10     12        0.112745       0.067135          0.112745              2


In [51]:
# November interpolation
df_monthly_interp = df_clim.copy()

# interpolation btw october and december
row_oct = df_monthly_interp[df_monthly_interp["month"] == 10]
row_dec = df_monthly_interp[df_monthly_interp["month"] == 12]

# Linear Interpolation 
row_nov = row_oct.copy()
row_nov["month"] = 11
row_nov["pteropods_std"] = np.NaN
row_nov["non_nan_count"] = 0
for col in ["pteropods_mean", "pteropods_median"]:
    row_nov[col] = (row_oct[col].values + row_dec[col].values) / 2

# Add november line to dataframe
df_monthly_interp = pd.concat([df_monthly_interp, row_nov], ignore_index=True)

# Sort by month
df_monthly_interp = df_monthly_interp.sort_values("month").reset_index(drop=True)


In [52]:
df_monthly_interp

Unnamed: 0,month,pteropods_mean,pteropods_std,pteropods_median,non_nan_count
0,1,0.463927,0.217691,0.45537,5
1,2,0.305677,0.380133,0.158055,42
2,3,0.353286,0.470167,0.11044,7
3,4,1.299415,1.208655,0.69935,6
4,5,8.603429,18.885279,2.04253,61
5,6,5.77516,14.995129,1.295875,52
6,7,1.056929,1.929591,0.35271,27
7,8,2.143262,6.147183,0.29525,47
8,9,2.060499,9.40126,0.28428,60
9,10,0.425594,0.379251,0.364525,19


In [53]:
fig = go.Figure()

# Mean + standard deviation (curve with error intervals)
fig.add_trace(go.Scatter(
    x=df_clim['month'],
    y=df_clim['pteropods_mean'],
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_clim['pteropods_std'],
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# Mediane (dots)
fig.add_trace(go.Scatter(
    x=df_clim['month'],
    y=df_clim['pteropods_median'],
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# non-NaN count (bar, second axis)
fig.add_trace(go.Bar(
    x=df_clim['month'],
    y=df_clim['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# presentation
fig.update_layout(
    title='Monthly Climatology of Pteropod Biomass',
    xaxis=dict(title='Month'),
    yaxis=dict(
        type='log',
        title='Mean Biomass [mg m-3]',
        #range=[df_clim["pteropods_mean"].min() * 0.9, df_clim["pteropods_mean"].max() * 3.4]  # dynamic adjustment
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


In [65]:
import plotly.graph_objects as go

fig = go.Figure()

# Mask for November
is_nov = df_monthly_interp["month"] == 11
is_not_nov = df_monthly_interp["month"] != 11

# === Mean Biomass ===
# Non-November
fig.add_trace(go.Scatter(
    x=df_monthly_interp[is_not_nov]['month'],
    y=df_monthly_interp[is_not_nov]['pteropods_mean'],
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_monthly_interp[is_not_nov]['pteropods_std'],
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# November (special marker)
fig.add_trace(go.Scatter(
    x=df_monthly_interp[is_nov]['month'],
    y=df_monthly_interp[is_nov]['pteropods_mean'],
    mode='markers',
    name='Mean biomass (Nov, interpolated)',
    error_y=dict(
        type='data',
        array=df_monthly_interp[is_nov]['pteropods_std'],
        visible=True
    ),
    marker=dict(color='royalblue', symbol='x', size=10),
    showlegend=True
))

# === Median Biomass ===
fig.add_trace(go.Scatter(
    x=df_monthly_interp['month'],
    y=df_monthly_interp['pteropods_median'],
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# === Non-NaN count (bar) ===
fig.add_trace(go.Bar(
    x=df_monthly_interp['month'],
    y=df_monthly_interp['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# === Layout ===
fig.update_layout(
    title='Monthly Climatology of Pteropod Biomass (November interpolated)',
    xaxis=dict(title='Month', tickmode='linear'),
    yaxis=dict(
        type='log',
        title='Biomass [mg m-3]',
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


In [55]:
# Filter for May
ds_may = ds_pter_2deg[ds_pter_2deg["month"]==5]
df_may = ds_may.to_dataframe()

# Create boxplot
fig = px.box(
    df_may,
    x="pteropods",
    title="Distribution of Pteropod Biomass in May",
    labels={"pteropods": "Biomass"}
)

fig.show()
fig = px.histogram(
    df_may,
    x="pteropods",
    nbins=50,
    marginal="rug",  # add small marks below
    title="Histogram of Pteropod Biomass in May",
    labels={"pteropods": "Biomass"}
)

fig.update_layout(bargap=0.1)
fig.show()


In [64]:
#Box plot 
# Month to analyse
months_to_plot = [5, 6,7,8, 9]

df_selected = ds_pter_2deg.to_dataframe().reset_index()
df_selected = df_selected[df_selected["month"].isin(months_to_plot)]
df_selected = df_selected.dropna(subset=["pteropods"]).reset_index()

fig = px.box(
    df_selected,
    x="month",
    y="pteropods",
    #color="depth",
    title="Pteropod Biomass Distribution by Month ",
    labels={"pteropods": "Biomass", "month": "Month"},
)

fig.update_layout(boxmode="group")  # group the boxes per month
fig.show()


In [165]:
df_selected

Unnamed: 0,index,time,is_day,latitude,longitude,depth,year,month,pteropods
0,40,1995-09-24,False,49.5,-128.5,50,1995,9,0.00000
1,116,1995-09-24,True,49.5,-128.5,250,1995,9,1.18244
2,184,1995-09-25,False,49.5,-128.5,50,1995,9,3.37130
3,188,1995-09-25,False,49.5,-128.5,250,1995,9,0.62439
4,260,1995-09-25,True,49.5,-128.5,250,1995,9,0.07184
...,...,...,...,...,...,...,...,...,...
242,88402,2020-07-05,True,50.5,-129.5,150,2020,7,0.30357
243,88796,2020-08-14,True,48.5,-128.5,250,2020,8,0.47320
244,88924,2020-08-15,True,48.5,-130.5,250,2020,8,0.00371
245,89556,2020-08-30,True,50.5,-129.5,250,2020,8,0.13172


In [175]:
for i in range(len(df_selected)):
    if df_selected['pteropods'][i]>20:
        print('date :',df_selected['month'][i],df_selected['year'][i],' Location :',df_selected['latitude'][i],'N',df_selected['longitude'][i],'E',' Pteropod Biomass :',round(df_selected['pteropods'][i],2))

date : 5 1999  Location : 50.5 N -129.5 E  Pteropod Biomass : 29.74
date : 5 1999  Location : 50.5 N -129.5 E  Pteropod Biomass : 93.47
date : 9 2001  Location : 50.5 N -129.5 E  Pteropod Biomass : 70.44
date : 9 2001  Location : 50.5 N -129.5 E  Pteropod Biomass : 23.44
date : 5 2003  Location : 50.5 N -129.5 E  Pteropod Biomass : 86.33
date : 5 2003  Location : 50.5 N -129.5 E  Pteropod Biomass : 37.19
date : 5 2003  Location : 50.5 N -129.5 E  Pteropod Biomass : 79.01
date : 8 2009  Location : 48.5 N -130.5 E  Pteropod Biomass : 41.34
date : 6 2011  Location : 48.5 N -128.5 E  Pteropod Biomass : 26.69
date : 6 2013  Location : 49.5 N -128.5 E  Pteropod Biomass : 42.32
date : 6 2013  Location : 50.5 N -129.5 E  Pteropod Biomass : 32.02
date : 6 2013  Location : 48.5 N -130.5 E  Pteropod Biomass : 95.87


In [None]:
#histogram
months_to_plot = [5, 6, 7, 8, 9]

fig = make_subplots(rows=2, cols=2, subplot_titles=[f"Month {m}" for m in months_to_plot])

for i, month in enumerate(months_to_plot):
    df_month = df_selected[df_selected["month"] == month]

    row = i // 2 + 1
    col = i % 2 + 1

    fig.add_trace(
        go.Histogram(
            x=df_month["pteropods"],
            name=f"Month {month}",
            showlegend=False,
            opacity=0.75
        ),
        row=row,
        col=col
    )

fig.update_layout(
    title="Histograms of Pteropod Biomass in Months May, June, August, September",
    height=800,
    bargap=0.1
)

fig.show()


### Seasonal

In [69]:
#define Seasons
# Extract month
ds_pter_2deg = ds_pter_2deg.assign_coords(month=ds_pter_2deg['time'].dt.month)

# get season
season_labels = xr.apply_ufunc(
    np.vectorize(get_season),
    ds_pter_2deg['month'],
    vectorize=True
)

# Add season to dataset
ds_pter_2deg = ds_pter_2deg.assign_coords(season=("time", season_labels.data))


In [70]:
#compute seasonal climatology
seasonal_mean = ds_pter_2deg.groupby('season').mean(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
seasonal_std = ds_pter_2deg.groupby('season').std(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
seasonal_median = ds_pter_2deg.groupby('season').median(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
seasonal_count = ds_pter_2deg.groupby('season').count(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'])
# DataFrame for ploty
df_seasonal_mean = seasonal_mean.to_dataframe().reset_index()
df_seasonal_std = seasonal_std.to_dataframe().reset_index()
df_seasonal_median = seasonal_median.to_dataframe(name="pteropods_median").reset_index()
df_seasonal_count = seasonal_count.to_dataframe(name="non_nan_count").reset_index()

df_seasonal = df_seasonal_mean.merge(df_seasonal_std, on="season", suffixes=("_mean", "_std"))
df_seasonal = df_seasonal.merge(df_seasonal_median, on="season")
df_seasonal = df_seasonal.merge(df_seasonal_count, on="season")

In [71]:
#re-order seasons
from pandas.api.types import CategoricalDtype

# define ordrer
season_order = CategoricalDtype(
    categories=["Winter", "Spring", "Summer", "Autumn"],
    ordered=True
)

# re-order
df_seasonal["season"] = df_seasonal["season"].astype(season_order)

# sort dataframe
df_seasonal = df_seasonal.sort_values("season").reset_index(drop=True)


In [72]:
df_seasonal

Unnamed: 0,season,pteropods_mean,pteropods_std,pteropods_median,non_nan_count
0,Winter,0.31395,0.364533,0.1638,49
1,Spring,7.230793,17.407405,1.78432,74
2,Summer,3.409355,10.572787,0.52449,126
3,Autumn,1.667294,8.224943,0.299075,79


In [74]:
fig = go.Figure()

# Mean + standard deviation (curve with error intervals)
fig.add_trace(go.Scatter(
    x=df_seasonal['season'],
    y=df_seasonal['pteropods_mean'],
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_seasonal['pteropods_std'],
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# Mediane (dots)
fig.add_trace(go.Scatter(
    x=df_seasonal['season'],
    y=df_seasonal['pteropods_median'],
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# non-NaN count (bar, second axis)
fig.add_trace(go.Bar(
    x=df_seasonal['season'],
    y=df_seasonal['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# presentation
fig.update_layout(
    title='Seasonal Climatology of Pteropod Biomass',
    xaxis=dict(title='Season'),
    yaxis=dict(
        title='Biomass [mg m-3]',
        type='log',
        #range=[df_clim["pteropods_mean"].min() * 0.9, df_clim["pteropods_mean"].max() * 3.4]  # dynamic adjustment
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


### Comparison Mackas et Galbraith 2012

In [99]:
def get_group_month(month):
    if month in [2,3]: #February March
        return "Feb-Mar"
    elif month in [5,6]: #May June
        return "May-Jun"
    elif month in [8,9]: # August September
        return "Aug-Sep"
    else: # Not taken into account in the paper
        return "Others"

In [101]:
#define Seasons
# Extract month
ds_pter_2deg = ds_pter_2deg.assign_coords(month=ds_pter_2deg['time'].dt.month)

# get season
paperseason_labels = xr.apply_ufunc(
    np.vectorize(get_group_month),
    ds_pter_2deg['month'],
    vectorize=True
)

# Add season to dataset
ds_pter_2deg = ds_pter_2deg.assign_coords(season=("time", paperseason_labels.data))

In [137]:
ds_pter_2deg

In [138]:
#compute seasonal climatology
paperseasonal_mean = ds_pter_2deg.groupby('season').mean(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_std = ds_pter_2deg.groupby('season').std(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_median = ds_pter_2deg.groupby('season').median(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_count = ds_pter_2deg.groupby('season').count(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'])
# DataFrame for ploty
df_seasonal_mean = paperseasonal_mean.to_dataframe().reset_index()
df_seasonal_std = paperseasonal_std.to_dataframe().reset_index()
df_seasonal_median = paperseasonal_median.to_dataframe(name="pteropods_median").reset_index()
df_seasonal_count = paperseasonal_count.to_dataframe(name="non_nan_count").reset_index()

df_paperseasonal = df_seasonal_mean.merge(df_seasonal_std, on="season", suffixes=("_mean", "_std"))
df_paperseasonal = df_paperseasonal.merge(df_seasonal_median, on="season")
df_paperseasonal = df_paperseasonal.merge(df_seasonal_count, on="season")

In [139]:
df_paperseasonal

Unnamed: 0,season,pteropods_mean,pteropods_std,pteropods_median,non_nan_count
0,Aug-Sep,2.096852,8.133943,0.28642,107
1,Feb-Mar,0.312479,0.394608,0.14394,49
2,May-Jun,7.301925,17.262375,1.68828,113
3,Others,0.796017,1.424509,0.364525,59


In [140]:
#re-order seasons
from pandas.api.types import CategoricalDtype

# define ordrer
paperseason_order = CategoricalDtype(
    categories=["Feb-Mar", "May-Jun", "Aug-Sep", "Others"],
    ordered=True
)

# re-order
df_paperseasonal["season"] = df_paperseasonal["season"].astype(paperseason_order)

# sort dataframe
df_paperseasonal = df_paperseasonal.sort_values("season").reset_index(drop=True)

In [141]:
df_paperseasonal=df_paperseasonal[:3]
df_paperseasonal

Unnamed: 0,season,pteropods_mean,pteropods_std,pteropods_median,non_nan_count
0,Feb-Mar,0.312479,0.394608,0.14394,49
1,May-Jun,7.301925,17.262375,1.68828,113
2,Aug-Sep,2.096852,8.133943,0.28642,107


In [108]:
fig = go.Figure()

# Mean + standard deviation (curve with error intervals)
fig.add_trace(go.Scatter(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['pteropods_mean'],
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_paperseasonal['pteropods_std'],
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# Mediane (dots)
fig.add_trace(go.Scatter(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['pteropods_median'],
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# non-NaN count (bar, second axis)
fig.add_trace(go.Bar(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# presentation
fig.update_layout(
    title='Average Seasonal Cycles of Pteropod Biomass',
    xaxis=dict(title='Season'),
    yaxis=dict(
        title='Biomass [mg m-3]',
        type='log',
        #range=[df_clim["pteropods_mean"].min() * 0.9, df_clim["pteropods_mean"].max() * 3.4]  # dynamic adjustment
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


## Climatology with obs epipelagic layer depth

### Yearly mean of epipelagic layer depth

In [176]:
path_pld="/data/rd_exchange/sroyer/SEAPOPYM/daily_pelagiclayerdepth_papa_1998_2020.nc"
ds_pld = xr.open_dataset(path_pld,engine='netcdf4')
pld = ds_pld['pelagic_layer_depth']

In [177]:
# yearly mean pld (1998-2020)
yearly_pld=float(pld.mean())
print(yearly_pld)

81.71123059583354


#### Seasons (like paper)

In [178]:
fig = go.Figure()

# Mean + standard deviation (curve with error intervals)
fig.add_trace(go.Scatter(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['pteropods_mean']*yearly_pld/1000,
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_paperseasonal['pteropods_std']*yearly_pld/1000,
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# Mediane (dots)
fig.add_trace(go.Scatter(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['pteropods_median']*yearly_pld/1000,
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# non-NaN count (bar, second axis)
fig.add_trace(go.Bar(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# presentation
fig.update_layout(
    title='Average Seasonal Cycles of Pteropod Biomass',
    xaxis=dict(title='Season'),
    yaxis=dict(
        title='Biomass [g m-2]',
        type='log',
        #range=[df_clim["pteropods_mean"].min() * 0.9, df_clim["pteropods_mean"].max() * 3.4]  # dynamic adjustment
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


#### Monthly 

In [179]:
fig = go.Figure()

# Mask for November
is_nov = df_monthly_interp["month"] == 11
is_not_nov = df_monthly_interp["month"] != 11

# === Mean Biomass ===
# Non-November
fig.add_trace(go.Scatter(
    x=df_monthly_interp[is_not_nov]['month'],
    y=df_monthly_interp[is_not_nov]['pteropods_mean']*yearly_pld/1000,
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_monthly_interp[is_not_nov]['pteropods_std'],
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# November (special marker)
fig.add_trace(go.Scatter(
    x=df_monthly_interp[is_nov]['month'],
    y=df_monthly_interp[is_nov]['pteropods_mean']*yearly_pld/1000,
    mode='markers',
    name='Mean biomass (Nov, interpolated)',
    error_y=dict(
        type='data',
        array=df_monthly_interp[is_nov]['pteropods_std'],
        visible=True
    ),
    marker=dict(color='royalblue', symbol='x', size=10),
    showlegend=True
))

# === Median Biomass ===
fig.add_trace(go.Scatter(
    x=df_monthly_interp['month'],
    y=df_monthly_interp['pteropods_median']*yearly_pld/1000,
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# === Non-NaN count (bar) ===
fig.add_trace(go.Bar(
    x=df_monthly_interp['month'],
    y=df_monthly_interp['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# === Layout ===
fig.update_layout(
    title='Monthly Climatology of Pteropod Biomass (November interpolated)',
    xaxis=dict(title='Month', tickmode='linear'),
    yaxis=dict(
        type='log',
        title='Biomass [g m-2]',
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


### Monthly epipelagic layer depth

In [180]:
pld

In [181]:
# Monthly pld
# Extract month
pld['month'] = pld['time.month']
# Mean
monthly_pld = pld.groupby('month').mean(dim=['time'])

#### Seasons

In [182]:
#define Seasons
# Extract month
pld = pld.assign_coords(month=pld['month'])

# get season
paperseason_labels = xr.apply_ufunc(
    np.vectorize(get_group_month),
    pld['month'],
    vectorize=True
)

# Add season to dataset
pld = pld.assign_coords(season=("time", paperseason_labels.data))

#-------------------
#compute seasonal mean
seasonal_pld=pld.groupby('season').mean(dim=['time']) 
#convert to dataframe
df_seasonal_pld=seasonal_pld.to_dataframe().reset_index()
#-------------------

#re-order seasons
from pandas.api.types import CategoricalDtype

# define ordrer
paperseason_order = CategoricalDtype(
    categories=["Feb-Mar", "May-Jun", "Aug-Sep", "Others"],
    ordered=True
)

# re-order
df_seasonal_pld["season"] = df_seasonal_pld["season"].astype(paperseason_order)

# sort dataframe
df_seasonal_pld = df_seasonal_pld.sort_values("season").reset_index(drop=True)

# slice [:3] 
df_seasonal_pld=df_seasonal_pld[:3]


In [183]:
df_seasonal_pld

Unnamed: 0,season,pelagic_layer_depth
0,Feb-Mar,82.442908
1,May-Jun,81.90519
2,Aug-Sep,82.507974


In [184]:
fig = go.Figure()

# Mean + standard deviation (curve with error intervals)
fig.add_trace(go.Scatter(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['pteropods_mean']*df_seasonal_pld['pelagic_layer_depth']/1000,
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_paperseasonal['pteropods_std']*df_seasonal_pld['pelagic_layer_depth']/1000,
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# Median (dots)
fig.add_trace(go.Scatter(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['pteropods_median']*df_seasonal_pld['pelagic_layer_depth']/1000,
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# non-NaN count (bar, second axis)
fig.add_trace(go.Bar(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# presentation
fig.update_layout(
    title='Average Seasonal Cycles of Pteropod Biomass',
    xaxis=dict(title='Season'),
    yaxis=dict(
        title='Biomass [g m-2]',
        type='log',
        #range=[df_clim["pteropods_mean"].min() * 0.9, df_clim["pteropods_mean"].max() * 3.4]  # dynamic adjustment
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


#### Monthly

In [185]:
fig = go.Figure()
df_m_interp_gm2=df_monthly_interp.copy()
# convert units
df_m_interp_gm2['pteropods_mean']=df_monthly_interp['pteropods_mean']*monthly_pld/1000
df_m_interp_gm2['pteropods_median']=df_monthly_interp['pteropods_median']*monthly_pld/1000
# Mask for November
is_nov = df_m_interp_gm2["month"] == 11
is_not_nov = df_m_interp_gm2["month"] != 11

# === Mean Biomass ===
# Non-November
fig.add_trace(go.Scatter(
    x=df_m_interp_gm2[is_not_nov]['month'],
    y=df_m_interp_gm2[is_not_nov]['pteropods_mean'],
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_m_interp_gm2[is_not_nov]['pteropods_std'],
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# November (special marker)
fig.add_trace(go.Scatter(
    x=df_m_interp_gm2[is_nov]['month'],
    y=df_m_interp_gm2[is_nov]['pteropods_mean'],
    mode='markers',
    name='Mean biomass (Nov, interpolated)',
    error_y=dict(
        type='data',
        array=df_m_interp_gm2[is_nov]['pteropods_std'],
        visible=True
    ),
    marker=dict(color='royalblue', symbol='x', size=10),
    showlegend=True
))

# === Median Biomass ===
fig.add_trace(go.Scatter(
    x=df_m_interp_gm2['month'],
    y=df_m_interp_gm2['pteropods_median'],
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# === Non-NaN count (bar) ===
fig.add_trace(go.Bar(
    x=df_m_interp_gm2['month'],
    y=df_m_interp_gm2['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# === Layout ===
fig.update_layout(
    title='Monthly Climatology of Pteropod Biomass (November interpolated)',
    xaxis=dict(title='Month', tickmode='linear'),
    yaxis=dict(
        type='log',
        title='Biomass [g m-2]',
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


### Daily observed epipelagic layer depth

In [10]:
#load dataset 
papa_data_v2 = xr.load_dataset(
    "../2_bis_processed_pteropods/papa_pteropod.zarr", engine="zarr"
)

papa_data_v2

In [9]:
#extract pteropod data
ds_v2_pter=papa_data_v2['pteropods']
ds_v2_pter

In [4]:
#extract epipelagic layer depth
ds_v2_pld=papa_data_v2['epipelagic_depth']
ds_v2_pld

In [5]:
# Convert pteropod biomass from mg/m3 to g/m2
# Formula: (mg/m3) * (m) / 1000 = g/m2

ds_v2_pter_gm2 = (ds_v2_pter * ds_v2_pld) / 1000


In [6]:
ds_v2_pter_gm2.name = 'pteropod_biomass_gm2'
ds_v2_pter_gm2.attrs['units'] = 'g m-2'
ds_v2_pter_gm2.attrs['long_name'] = 'Pteropod integrated biomass in epipelagic layer'

ds_v2_pter_gm2


#### Seasons 

In [191]:
#define Seasons
# Extract month
ds_v2_pter_gm2 = ds_v2_pter_gm2.assign_coords(month=ds_v2_pter_gm2['time'].dt.month)

# get season
paperseason_labels = xr.apply_ufunc(
    np.vectorize(get_group_month),
    ds_v2_pter_gm2['month'],
    vectorize=True
)

# Add season to dataset
ds_v2_pter_gm2 = ds_v2_pter_gm2.assign_coords(season=("time", paperseason_labels.data))

In [192]:
#compute seasonal climatology
paperseasonal_mean = ds_v2_pter_gm2.groupby('season').mean(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_std = ds_v2_pter_gm2.groupby('season').std(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_median = ds_v2_pter_gm2.groupby('season').median(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_count = ds_v2_pter_gm2.groupby('season').count(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'])
# DataFrame for ploty
df_seasonal_mean = paperseasonal_mean.to_dataframe().reset_index()
df_seasonal_std = paperseasonal_std.to_dataframe().reset_index()
df_seasonal_median = paperseasonal_median.to_dataframe(name="pteropods_median").reset_index()
df_seasonal_count = paperseasonal_count.to_dataframe(name="non_nan_count").reset_index()

df_paperseasonal_v2 = df_seasonal_mean.merge(df_seasonal_std, on="season", suffixes=("_mean", "_std"))
df_paperseasonal_v2 = df_paperseasonal_v2.merge(df_seasonal_median, on="season")
df_paperseasonal_v2 = df_paperseasonal_v2.merge(df_seasonal_count, on="season")

In [193]:
#re-order seasons
from pandas.api.types import CategoricalDtype

# define ordrer
paperseason_order = CategoricalDtype(
    categories=["Feb-Mar", "May-Jun", "Aug-Sep", "Others"],
    ordered=True
)

# re-order
df_paperseasonal_v2["season"] = df_paperseasonal_v2["season"].astype(paperseason_order)

# sort dataframe
df_paperseasonal_v2 = df_paperseasonal_v2.sort_values("season").reset_index(drop=True)

In [194]:
df_paperseasonal_v2=df_paperseasonal_v2[:3]
df_paperseasonal_v2

Unnamed: 0,season,pteropod_biomass_gm2_mean,pteropod_biomass_gm2_std,pteropods_median,non_nan_count
0,Feb-Mar,0.082276,0.301507,0.027342,139
1,May-Jun,0.523198,0.98853,0.199549,253
2,Aug-Sep,0.348961,1.230008,0.049654,225


In [196]:
fig = go.Figure()

# Mean + standard deviation (curve with error intervals)
fig.add_trace(go.Scatter(
    x=df_paperseasonal_v2['season'],
    y=df_paperseasonal_v2['pteropod_biomass_gm2_mean'],
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_paperseasonal_v2['pteropod_biomass_gm2_std'],
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# Median (dots)
fig.add_trace(go.Scatter(
    x=df_paperseasonal_v2['season'],
    y=df_paperseasonal_v2['pteropods_median'],
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# non-NaN count (bar, second axis)
fig.add_trace(go.Bar(
    x=df_paperseasonal_v2['season'],
    y=df_paperseasonal_v2['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# presentation
fig.update_layout(
    title='Average Seasonal Cycles of Pteropod Biomass',
    xaxis=dict(title='Season'),
    yaxis=dict(
        title='Biomass [g m-2]',
        type='log',
        #range=[df_clim["pteropods_mean"].min() * 0.9, df_clim["pteropods_mean"].max() * 3.4]  # dynamic adjustment
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


#### Monthly

In [7]:
# compute monthly climatology
# Extract month
ds_v2_pter_gm2['month'] = ds_v2_pter_gm2['time.month']

# Mean
monthly_mean = ds_v2_pter_gm2.groupby('month').mean(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Standard deviation
monthly_std = ds_v2_pter_gm2.groupby('month').std(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Non NaN count
monthly_count = ds_v2_pter_gm2.groupby('month').count(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Median
monthly_median = ds_v2_pter_gm2.groupby('month').median(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])

# DataFrames Conversion (to use px)
df_mean = monthly_mean.to_dataframe().reset_index()
df_std = monthly_std.to_dataframe().reset_index()
df_count = monthly_count.to_dataframe(name='non_nan_count').reset_index()
df_median=monthly_median.to_dataframe(name='pteropods_median').reset_index()

df_v2_clim = df_mean.merge(df_std, on='month', suffixes=('_mean', '_std'))
df_v2_clim = df_v2_clim.merge(df_median,on='month')
df_v2_clim = df_v2_clim.merge(df_count, on='month')


print(df_v2_clim)


   month  pteropod_biomass_gm2_mean  pteropod_biomass_gm2_std  \
0      1                   0.038890                  0.019303   
1      2                   0.087806                  0.316006   
2      3                   0.028681                  0.030936   
3      4                   0.121461                  0.098869   
4      5                   0.472203                  1.027126   
5      6                   0.544279                  0.971344   
6      7                   0.310690                  0.682278   
7      8                   0.241556                  0.702364   
8      9                   0.461248                  1.598187   
9     10                   0.052767                  0.129830   

   pteropods_median  non_nan_count  
0          0.037669              5  
1          0.028861            126  
2          0.010842             13  
3          0.057290              5  
4          0.138274             74  
5          0.233931            179  
6          0.117704      

In [8]:
df_v2_clim.to_csv("/data/rd_exchange/sroyer/SEAPOPYM/monthly_clim_pteropod_1998_2020.csv", index=False)

In [None]:
fig = go.Figure()

# Mean + standard deviation (curve with error intervals)
fig.add_trace(go.Scatter(
    x=df_v2_clim['month'],
    y=df_v2_clim['pteropod_biomass_gm2_mean'],
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_v2_clim['pteropod_biomass_gm2_std'],
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# Median (dots)
fig.add_trace(go.Scatter(
    x=df_v2_clim['month'],
    y=df_v2_clim['pteropods_median'],
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# non-NaN count (bar, second axis)
fig.add_trace(go.Bar(
    x=df_v2_clim['month'],
    y=df_v2_clim['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# presentation
fig.update_layout(
    title='Monthly Climatology of Pteropod Biomass ',
    xaxis=dict(title='Month'),
    yaxis=dict(
        type='log',
        title='Mean Biomass [g m-2]',
        #range=[df_clim["pteropods_mean"].min() * 0.9, df_clim["pteropods_mean"].max() * 3.4]  # dynamic adjustment
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


### Comparison

#### Season

In [201]:
# === Data Mackas et Galbraith 2012 ===
data = [
    ["Feb-Mar", 0.01147, 1],
    ["May-Jun", 0.089615, 1],
    ["Aug-Sep", 0.032502, 1],
    ["Feb-Mar", 0.78137, 0],
    ["May-Jun", 4.90316, 0],
    ["Aug-Sep", 0.427516, 0]
]

# === DataFrame creation ===
df_mackasgalbraith = pd.DataFrame(data, columns=["season", "biomass", "species"])
df_mackasgalbraith["species_name"] = df_mackasgalbraith["species"].map({1: "Clione", 0: "Limacina"})


In [None]:
fig = go.Figure()

# Median - yearly pld
fig.add_trace(go.Scatter(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['pteropods_median']*yearly_pld/1000,
    mode='markers',
    name='Median biomass x yearly pld',
    marker=dict(color='orange', symbol='circle', size=8)
))

# Median - monthly pld
fig.add_trace(go.Scatter(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['pteropods_median']*df_seasonal_pld['pelagic_layer_depth']/1000,
    mode='markers',
    name='Median biomass x monthly pld',
    marker=dict(color='royalblue', symbol='circle', size=8)
))

# Median - daily pld
fig.add_trace(go.Scatter(
    x=df_paperseasonal_v2['season'],
    y=df_paperseasonal_v2['pteropods_median'],
    mode='markers',
    name='Median biomass x daily pld',
    marker=dict(color='green', symbol='circle', size=8)
))

#add data from paper
for species in df_mackasgalbraith["species_name"].unique():
    sub_df_mackasgalbraith = df_mackasgalbraith[df_mackasgalbraith["species_name"] == species]
    fig.add_trace(go.Scatter(
        x=sub_df_mackasgalbraith["season"],
        y=sub_df_mackasgalbraith["biomass"],
        mode="markers",
        name=f'{species} (from paper)',
        marker=dict(
            size=10,
            color='red'if species == "Clione" else "pink",
            symbol="triangle-up" if species == "Clione" else "triangle-down"
        )
    ))

# presentation
fig.update_layout(
    title='Average Seasonal Cycles of Pteropod Biomass - comparison btw unit conversion methods',
    xaxis=dict(title='Season'),
    yaxis=dict(
        title='Biomass [g m-2]',
        type='log',
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


#### Monthly

In [198]:
fig = go.Figure()

# === Median Biomass === yearly epipelagic layer depth in orange
fig.add_trace(go.Scatter(
    x=df_monthly_interp[is_not_nov]['month'],
    y=df_monthly_interp[is_not_nov]['pteropods_median']*yearly_pld/1000,
    mode='markers',
    name='Median biomass x yearly pld',
    marker=dict(color='orange', symbol='circle', size=8)
))
# November (special marker)
fig.add_trace(go.Scatter(
    x=df_m_interp_gm2[is_nov]['month'],
    y=df_m_interp_gm2[is_nov]['pteropods_median'],
    mode='markers',
    name='Nov, interpolated',
    marker=dict(color='orange', symbol='x', size=10),
    showlegend=True
))

# === Median Biomass === monthly epipelagic layer depth
fig.add_trace(go.Scatter(
    x=df_m_interp_gm2[is_not_nov]['month'],
    y=df_m_interp_gm2[is_not_nov]['pteropods_median'],
    mode='markers',
    name='Median biomass x monthly pld',
    marker=dict(color='royalblue', symbol='circle', size=8)
))
# November (special marker)
fig.add_trace(go.Scatter(
    x=df_m_interp_gm2[is_nov]['month'],
    y=df_m_interp_gm2[is_nov]['pteropods_median'],
    mode='markers',
    name='Nov, interpolated',
    marker=dict(color='royalblue', symbol='x', size=10),
    showlegend=True
))
# === Median Biomass === daily epipelagic layer depth
fig.add_trace(go.Scatter(
    x=df_v2_clim['month'],
    y=df_v2_clim['pteropods_median'],
    mode='markers',
    name='Median biomass x daily pld',
    marker=dict(color='green', symbol='circle', size=8)
))

# === Layout ===
fig.update_layout(
    title='Monthly Climatology of Pteropod Biomass - comparison btw unit conversion methods',
    xaxis=dict(title='Month', tickmode='linear'),
    yaxis=dict(
        type='log',
        title='Biomass [g m-2]',
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()
