# Climatology of pteropods at PAPA

Conversion mg/m3 by multiplying with the epipelagic depth of the mesurments day.

All pteropods observed at PAPA station : Clio, Gymno, Lim

Cleaned Notebook 

## Set Up

In [1]:
# imports
import xarray as xr
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np 

<div style="padding: 10px; border-radius: 5px; background-color: #cce5ff; color: #004085; border-left: 5px solid #007bff;">
    <strong>Note :</strong> Let's use the most widespread method to deal with leap years : exclude 29 of February
</div>

In [12]:
# Create the mapping MM-DD -> day (except leap years)
days = pd.date_range('2001-01-01', '2001-12-31')  # 2001 not a leap year
mmdd_to_day = {d.strftime('%m-%d'): i+1 for i, d in enumerate(days)}

In [2]:
#load dataset 
papa_data = xr.load_dataset("../2_bis_processed_pteropods/papa_pteropod.zarr", engine="zarr")
papa_data

In [3]:
#extract pteropod data
ds_pter=papa_data['pteropods']
ds_pter

In [4]:
#extract epipelagic layer depth
ds_pld=papa_data['epipelagic_depth']
ds_pld

In [6]:
# Convert pteropod biomass from mg/m3 to g/m2
# Formula: (mg/m3) * (m) / 1000 = g/m2

ds_pter_gm2 = (ds_pter * ds_pld) / 1000
ds_pter_gm2.name = 'pteropod_biomass_gm2'
ds_pter_gm2.attrs['units'] = 'g m-2'
ds_pter_gm2.attrs['long_name'] = 'Pteropod integrated biomass in epipelagic layer'

ds_pter_gm2

## Select Area


In [7]:
#per location
per_lat_lon = ds_pter_gm2.count(dim=["time", "depth", "is_day"])
df_lat_lon = per_lat_lon.to_dataframe(name="non_nan_count").reset_index()

heatmap_data = df_lat_lon.pivot(index="latitude", columns="longitude", values="non_nan_count")

fig = go.Figure(data=go.Heatmap(
    z=heatmap_data.values,
    x=heatmap_data.columns,
    y=heatmap_data.index,
    colorscale='plasma',
    colorbar=dict(title="Non-NaN entries")
))
#Labels
fig.update_layout(
    yaxis=dict(
        title="Latitude"
    ),
    xaxis=dict(
        title="Longitude"
    ),
    title="Heatmap of valid pteropod data entries by latitude and longitude"
)

fig.show()


<div style="padding: 10px; border-radius: 5px; background-color: #cce5ff; color: #004085; border-left: 5px solid #007bff;">
    <strong>Note :</strong> Selected 2° Area : 48.5N-50.5N, 128.5W-130.5W
</div>

In [113]:
ds_pter_2deg = ds_pter_gm2.sel(
    latitude=slice(48.5, 50.5),
    longitude=slice(-130.5, -128.5)
)
ds_pter_2deg

In [118]:
ds_time_serie=ds_pter_2deg.mean(dim=["is_day","latitude", "longitude", "depth"],skipna=True)
ds_time_serie

In [120]:
#remove 0 (convenient for log transformation)
ds_time_serie=ds_time_serie.where(ds_time_serie!=0,np.nan) 
# remove NaN
mask = ~np.isnan(ds_time_serie)
ds_clean = ds_time_serie[mask]
ds_clean

In [None]:
#ds_clean.to_netcdf('/data/rd_exchange/sroyer/SEAPOPYM/time_serie_pter_papa_1998_2020.nc')

## Time Series

In [114]:
# Spatial and vertical mean
mean_timeseries = ds_pter_2deg.mean(dim=["is_day","latitude", "longitude", "depth"], skipna=True)

# assign day coordinate
# Extract MM-DD 
mmdd = pd.to_datetime(mean_timeseries.time.values).strftime('%m-%d')
# transform MM-DD in indice of day in year (1->365) avoiding leap year discordance 
day_noleap = pd.Series(mmdd).map(mmdd_to_day).values
# add coord day
mean_timeseries = mean_timeseries.assign_coords(day=("time", day_noleap))

# Plot
# Group by year
years = pd.DatetimeIndex(mean_timeseries.time.values).year.unique()

fig = go.Figure()
# Plot one color per year
for year in years:
    yearly_data = mean_timeseries.sel(time=str(year))
    fig.add_trace(go.Scatter(
        x=yearly_data['day'],
        y=yearly_data.values,
        mode='markers',
        name=str(year),
        showlegend=True 
    ))

fig.update_layout(
    title='Pteropod Biomass Time Serie at PAPA (1998-2020)',
    xaxis_title='Day of Year',
    yaxis=dict(
        #type='log',
        title='Biomass [g m-2]',
        #range=[0, 5],
    ),
    template='plotly_white'
)


fig.show()


<div style="padding: 10px; border-radius: 5px; background-color: #cce5ff; color: #004085; border-left: 5px solid #007bff;">
    <strong>Note :</strong> It's a gaussian distribution, so it's better to do the stats and the optimisation with the log10. <br>
    Quentin (which one ?) is doing that also, ask him if he has some papers
</div>

<div style="padding: 10px; border-radius: 5px; background-color: #cce5ff; color: #004085; border-left: 5px solid #007bff;">
    <strong>Note :</strong> Remove outliers : <br>
    - June 2013 ; 9.044 g.m-2 <br>
    - Sep 2009 ; 4.57 g.m-2 (the day before : 0.16 g.m-2) <br> 
    

</div>

In [35]:
# Remove les outliers > 4 g/m2
ds_pter_2deg = ds_pter_2deg.where(ds_pter_2deg <= 4)


In [28]:
# Spatial and vertical mean
mean_timeseries = ds_pter_2deg.mean(dim=["is_day","latitude", "longitude", "depth"], skipna=True)

# assign day coordinate
# Extract MM-DD 
mmdd = pd.to_datetime(mean_timeseries.time.values).strftime('%m-%d')
# transform MM-DD in indice of day in year (1->365) avoiding leap year discordance 
day_noleap = pd.Series(mmdd).map(mmdd_to_day).values
# add coord day
mean_timeseries = mean_timeseries.assign_coords(day=("time", day_noleap))

# Plot
# Group by year
years = pd.DatetimeIndex(mean_timeseries.time.values).year.unique()

fig = go.Figure()
# Plot one color per year
for year in years:
    yearly_data = mean_timeseries.sel(time=str(year))
    fig.add_trace(go.Scatter(
        x=yearly_data['day'],
        y=yearly_data.values,
        mode='markers',
        name=str(year),
        showlegend=True 
    ))

fig.update_layout(
    title='Cleaned Pteropod Biomass Time Serie at PAPA (1998-2020)',
    xaxis_title='Day of Year',
    yaxis=dict(
        #type='log',
        title='Biomass [g m-2]',
        range=[0, 5],
    ),
    template='plotly_white'
)


fig.show()


## Histogram

In [122]:
ds_pter_2deg_no_0=ds_pter_2deg.where(ds_pter_2deg!=0,np.nan)
flat_data = ds_pter_2deg_no_0.values.flatten()
valid_data = flat_data[~np.isnan(flat_data)]

df = pd.DataFrame({"pteropod biomass (g/m2)": valid_data})

fig = px.histogram(
    df,
    x="pteropod biomass (g/m2)",
    nbins=50,
    labels={"count": "frequency"},
    title="Histogram of pteropods biomass entries "
)
fig.show()



In [None]:
ds_pter_2deg_no_0=ds_pter_2deg.where(ds_pter_2deg!=0,np.nan)
flat_data = ds_pter_2deg_no_0.values.flatten()
valid_data = flat_data[~np.isnan(flat_data)]

df = pd.DataFrame({"pteropod biomass (log10 g/m2)": valid_data})

fig = px.histogram(
    np.log10(df),
    x="pteropod biomass (log10 g/m2)",
    nbins=50,
    labels={"count": "frequency"},
    title="Histogram of pteropods biomass entries "
)
fig.show()



## Monthly Climatology

In [37]:
# compute monthly climatology
# Extract month
ds_pter_2deg['month'] = ds_pter_2deg['time.month']

# Mean
monthly_mean = ds_pter_2deg.groupby('month').mean(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Standard deviation
monthly_std = ds_pter_2deg.groupby('month').std(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Non NaN count
monthly_count = ds_pter_2deg.groupby('month').count(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Median
monthly_median = ds_pter_2deg.groupby('month').median(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])

# DataFrames Conversion (to use px)
df_mean = monthly_mean.to_dataframe().reset_index()
df_std = monthly_std.to_dataframe().reset_index()
df_count = monthly_count.to_dataframe(name='non_nan_count').reset_index()
df_median=monthly_median.to_dataframe(name='pteropod_biomass_median').reset_index()

df_clim = df_mean.merge(df_std, on='month', suffixes=('_mean', '_std'))
df_clim = df_clim.merge(df_median,on='month')
df_clim = df_clim.merge(df_count, on='month')


print(df_clim)


   month  pteropod_biomass_gm2_mean  pteropod_biomass_gm2_std  \
0      1                   0.038890                  0.019303   
1      2                   0.025768                  0.033271   
2      3                   0.014616                  0.020981   
3      4                   0.121461                  0.098869   
4      5                   0.280818                  0.396817   
5      6                   0.350057                  0.714995   
6      7                   0.146798                  0.260031   
7      8                   0.109048                  0.187616   
8      9                   0.067290                  0.233194   
9     10                   0.031324                  0.029795   

   pteropod_biomass_median  non_nan_count  
0                 0.037669              5  
1                 0.014692             41  
2                 0.006597              6  
3                 0.057290              5  
4                 0.131815             66  
5                 0.

In [32]:
df_clim.to_csv("/data/rd_exchange/sroyer/SEAPOPYM/monthly_clim_pteropod_1998_2020_V2.csv", index=False)

In [48]:
fig = go.Figure()

# Mean + standard deviation (curve with error intervals)
fig.add_trace(go.Scatter(
    x=df_clim['month'],
    y=df_clim['pteropod_biomass_gm2_mean'],
    mode='lines+markers',
    name='Mean biomass',
    error_y=dict(
        type='data',
        array=df_clim['pteropod_biomass_gm2_std'],
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))

# Median (dots)
fig.add_trace(go.Scatter(
    x=df_clim['month'],
    y=df_clim['pteropod_biomass_median'],
    mode='markers',
    name='Median biomass',
    marker=dict(color='orange', symbol='circle', size=8)
))

# non-NaN count (bar, second axis)
fig.add_trace(go.Bar(
    x=df_clim['month'],
    y=df_clim['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# presentation
fig.update_layout(
    title='Monthly Climatology of Pteropod Biomass ',
    xaxis=dict(title='Month'),
    yaxis=dict(
        type='log',
        title='Mean Biomass [g m-2]',
        #range=[df_clim["pteropods_mean"].min() * 0.9, df_clim["pteropods_mean"].max() * 3.4]  # dynamic adjustment
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


## Seasonal Climatology Compared with Litterature

In [41]:
# === Data Mackas et Galbraith 2012 ===
data = [
    ["Feb-Mar", 0.01147, 1],
    ["May-Jun", 0.089615, 1],
    ["Aug-Sep", 0.032502, 1],
    ["Feb-Mar", 0.78137, 0],
    ["May-Jun", 4.90316, 0],
    ["Aug-Sep", 0.427516, 0]
]

# === DataFrame creation ===
df_mackasgalbraith = pd.DataFrame(data, columns=["season", "biomass", "species"])
df_mackasgalbraith["species_name"] = df_mackasgalbraith["species"].map({1: "Clione", 0: "Limacina"})


In [44]:
#define Seasons
def get_group_month(month):
    if month in [2,3]: #February March
        return "Feb-Mar"
    elif month in [5,6]: #May June
        return "May-Jun"
    elif month in [8,9]: # August September
        return "Aug-Sep"
    else: # Not taken into account in the paper
        return "Others"
# get season
paperseason_labels = xr.apply_ufunc(
    np.vectorize(get_group_month),
    ds_pter_2deg['month'],
    vectorize=True
)

# Add season to dataset
ds_pter_2deg = ds_pter_2deg.assign_coords(season=("time", paperseason_labels.data))

#compute seasonal climatology
paperseasonal_mean = ds_pter_2deg.groupby('season').mean(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_std = ds_pter_2deg.groupby('season').std(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_median = ds_pter_2deg.groupby('season').median(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_count = ds_pter_2deg.groupby('season').count(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'])
# DataFrame for ploty
df_seasonal_mean = paperseasonal_mean.to_dataframe().reset_index()
df_seasonal_std = paperseasonal_std.to_dataframe().reset_index()
df_seasonal_median = paperseasonal_median.to_dataframe(name="pteropod_biomass_median").reset_index()
df_seasonal_count = paperseasonal_count.to_dataframe(name="non_nan_count").reset_index()

df_paperseasonal = df_seasonal_mean.merge(df_seasonal_std, on="season", suffixes=("_mean", "_std"))
df_paperseasonal = df_paperseasonal.merge(df_seasonal_median, on="season")
df_paperseasonal = df_paperseasonal.merge(df_seasonal_count, on="season")

#re-order seasons
from pandas.api.types import CategoricalDtype

# define ordrer
paperseason_order = CategoricalDtype(
    categories=["Feb-Mar", "May-Jun", "Aug-Sep", "Others"],
    ordered=True
)

# re-order
df_paperseasonal["season"] = df_paperseasonal["season"].astype(paperseason_order)

# sort dataframe
df_paperseasonal = df_paperseasonal.sort_values("season").reset_index(drop=True)

df_paperseasonal=df_paperseasonal[:3] # Remove others months
df_paperseasonal

Unnamed: 0,season,pteropod_biomass_gm2_mean,pteropod_biomass_gm2_std,pteropod_biomass_median,non_nan_count
0,Feb-Mar,0.024345,0.032182,0.011822,47
1,May-Jun,0.31133,0.560836,0.118413,118
2,Aug-Sep,0.086038,0.214941,0.020765,98


In [46]:
fig = go.Figure()

# Median - daily pld
fig.add_trace(go.Scatter(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['pteropod_biomass_median'],
    mode='markers',
    name='Median biomass x daily pld',
    marker=dict(color='green', symbol='circle', size=8)
))

#add data from paper
for species in df_mackasgalbraith["species_name"].unique():
    sub_df_mackasgalbraith = df_mackasgalbraith[df_mackasgalbraith["species_name"] == species]
    fig.add_trace(go.Scatter(
        x=sub_df_mackasgalbraith["season"],
        y=sub_df_mackasgalbraith["biomass"],
        mode="markers",
        name=f'{species} (from paper)',
        marker=dict(
            size=10,
            color='red'if species == "Clione" else "pink",
            symbol="triangle-up" if species == "Clione" else "triangle-down"
        )
    ))

# presentation
fig.update_layout(
    title='Average Seasonal Cycles of Pteropod Biomass - comparison with litterature',
    xaxis=dict(title='Season'),
    yaxis=dict(
        title='Biomass [g m-2]',
        type='log',
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


Rq : improvement in feb-mar (before *2!=) and may-june (same !=) in aug sep before = over-estimation, now under-estimation (of the same magnitude)

## Log10 Climatology

<div style="padding: 10px; border-radius: 5px; background-color: #cce5ff; color: #004085; border-left: 5px solid #007bff;">
    <strong>Note :</strong> To do this Log10 Climatology, 20 zeros values have been removed. (log10(0)=-inf )
</div>

In [108]:
# Compute Monthly log10 Climatology

# Convert in log 10 -----------------------------
ds_pter_log10=np.log10(ds_pter_2deg_no_0)
# Extract month
ds_pter_log10['month'] = ds_pter_log10['time.month']

# Mean
monthly_mean = ds_pter_log10.groupby('month').mean(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Standard deviation
monthly_std = ds_pter_log10.groupby('month').std(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Non NaN count
monthly_count = ds_pter_log10.groupby('month').count(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Median
monthly_median = ds_pter_log10.groupby('month').median(dim=['time', 'is_day', 'longitude', 'latitude', 'depth'])
# Convert back in 'normal'
monthly_mean=10**monthly_mean
#monthly_std=10**monthly_std
monthly_median=10**monthly_median

# DataFrames Conversion (to use px)
df_mean = monthly_mean.to_dataframe().reset_index()
df_std = monthly_std.to_dataframe().reset_index()
df_count = monthly_count.to_dataframe(name='non_nan_count').reset_index()
df_median=monthly_median.to_dataframe(name='pteropod_biomass_median').reset_index()

df_clim_v2 = df_mean.merge(df_std, on='month', suffixes=('_mean', '_std'))
df_clim_v2 = df_clim_v2.merge(df_median,on='month')
df_clim_v2 = df_clim_v2.merge(df_count, on='month')


print(df_clim_v2)


   month  pteropod_biomass_gm2_mean  pteropod_biomass_gm2_std  \
0      1                   0.033287                  0.260625   
1      2                   0.017922                  0.410406   
2      3                   0.009675                  0.433482   
3      4                   0.087757                  0.344764   
4      5                   0.143989                  0.715110   
5      6                   0.138077                  0.736781   
6      7                   0.036432                  0.808265   
7      8                   0.033574                  0.848848   
8      9                   0.028515                  0.627508   
9     10                   0.019716                  0.484149   

   pteropod_biomass_median  non_nan_count  
0                 0.037669              5  
1                 0.015123             37  
2                 0.009227              5  
3                 0.057290              5  
4                 0.145701             68  
5                 0.

In [None]:
#df_clim_v2.to_csv("/data/rd_exchange/sroyer/SEAPOPYM/monthly_clim_pteropod_1998_2020_v3.csv", index=False)

<div style="padding: 10px; border-radius: 5px; background-color: #d4edda; color: #155724; border-left: 5px solid #28a745;">
    <strong>Question:</strong> for the standard deviation, do i do 10**pter_std ?
</div>

In [106]:
fig = go.Figure()

# Mean + standard deviation (curve with error intervals)
fig.add_trace(go.Scatter(
    x=df_clim_v2['month'],
    y=df_clim_v2['pteropod_biomass_gm2_mean'],
    mode='lines+markers',
    name='Mean biomass using log10 method',
    error_y=dict(
        type='data',
        array=df_clim_v2['pteropod_biomass_gm2_std'],
        visible=True
    ),
    line=dict(color='royalblue'),
    marker=dict(size=6)
))
# Median (previous)
fig.add_trace(go.Scatter(
    x=df_clim['month'],
    y=df_clim['pteropod_biomass_median'],
    mode='markers',
    name='Median biomass',
    marker=dict(color='red', symbol='circle', size=8)
))
# Median (dots)
fig.add_trace(go.Scatter(
    x=df_clim_v2['month'],
    y=df_clim_v2['pteropod_biomass_median'],
    mode='markers',
    name='Median biomass using log10 method',
    marker=dict(color='orange', symbol='circle', size=8)
)) 


# non-NaN count (bar, second axis)
fig.add_trace(go.Bar(
    x=df_clim_v2['month'],
    y=df_clim_v2['non_nan_count'],
    name='Valid data count',
    yaxis='y2',
    opacity=0.4,
    marker_color='lightgrey'
))

# presentation
fig.update_layout(
    title='Monthly Climatology of Pteropod Biomass ',
    xaxis=dict(title='Month'),
    yaxis=dict(
        type='log',
        title='Mean Biomass [g m-2]',
        #range=[df_clim["pteropods_mean"].min() * 0.9, df_clim["pteropods_mean"].max() * 3.4]  # dynamic adjustment
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


### Compare with litterature

In [109]:
#define Seasons
def get_group_month(month):
    if month in [2,3]: #February March
        return "Feb-Mar"
    elif month in [5,6]: #May June
        return "May-Jun"
    elif month in [8,9]: # August September
        return "Aug-Sep"
    else: # Not taken into account in the paper
        return "Others"
# get season
paperseason_labels = xr.apply_ufunc(
    np.vectorize(get_group_month),
    ds_pter_log10['month'],
    vectorize=True
)

# Add season to 

ds_pter_log10 = ds_pter_log10.assign_coords(season=("time", paperseason_labels.data))

#compute seasonal climatology
paperseasonal_mean = ds_pter_log10.groupby('season').mean(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_std = ds_pter_log10.groupby('season').std(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_median = ds_pter_log10.groupby('season').median(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'], skipna=True)
paperseasonal_count = ds_pter_log10.groupby('season').count(dim=['time', 'longitude', 'latitude', 'depth', 'is_day'])
#convert back in (normal 10**)
paperseasonal_mean=10**paperseasonal_mean
paperseasonal_median=10**paperseasonal_median
# DataFrame for ploty
df_seasonal_mean = paperseasonal_mean.to_dataframe().reset_index()
df_seasonal_std = paperseasonal_std.to_dataframe().reset_index()
df_seasonal_median = paperseasonal_median.to_dataframe(name="pteropod_biomass_median").reset_index()
df_seasonal_count = paperseasonal_count.to_dataframe(name="non_nan_count").reset_index()

df_paperseasonal = df_seasonal_mean.merge(df_seasonal_std, on="season", suffixes=("_mean", "_std"))
df_paperseasonal = df_paperseasonal.merge(df_seasonal_median, on="season")
df_paperseasonal = df_paperseasonal.merge(df_seasonal_count, on="season")

#re-order seasons
from pandas.api.types import CategoricalDtype

# define ordrer
paperseason_order = CategoricalDtype(
    categories=["Feb-Mar", "May-Jun", "Aug-Sep", "Others"],
    ordered=True
)

# re-order
df_paperseasonal["season"] = df_paperseasonal["season"].astype(paperseason_order)

# sort dataframe
df_paperseasonal = df_paperseasonal.sort_values("season").reset_index(drop=True)

df_paperseasonal=df_paperseasonal[:3] # Remove others months
df_paperseasonal

Unnamed: 0,season,pteropod_biomass_gm2_mean,pteropod_biomass_gm2_std,pteropod_biomass_median,non_nan_count
0,Feb-Mar,0.016654,0.422218,0.014732,42
1,May-Jun,0.141574,0.723988,0.138185,114
2,Aug-Sep,0.0308,0.7411,0.023482,89


In [104]:
fig = go.Figure()

# Median - daily pld
fig.add_trace(go.Scatter(
    x=df_paperseasonal['season'],
    y=df_paperseasonal['pteropod_biomass_median'],
    mode='markers',
    name='Median biomass x daily pld',
    marker=dict(color='green', symbol='circle', size=8)
))

#add data from paper
for species in df_mackasgalbraith["species_name"].unique():
    sub_df_mackasgalbraith = df_mackasgalbraith[df_mackasgalbraith["species_name"] == species]
    fig.add_trace(go.Scatter(
        x=sub_df_mackasgalbraith["season"],
        y=sub_df_mackasgalbraith["biomass"],
        mode="markers",
        name=f'{species} (from paper)',
        marker=dict(
            size=10,
            color='red'if species == "Clione" else "pink",
            symbol="triangle-up" if species == "Clione" else "triangle-down"
        )
    ))

# presentation
fig.update_layout(
    title=dict(
        text="Average Seasonal Cycles of Pteropod Biomass using log10 stats",
        subtitle=dict(
            text="comparison with litterature",
            font=dict(color="gray", size=13)
        )
        ),
    xaxis=dict(title='Season'),
    yaxis=dict(
        title='Biomass [g m-2]',
        type='log',
    ),
    yaxis2=dict(
        title='Non-NaN count',
        overlaying='y',
        side='right',
        showgrid=False
    ),
    legend=dict(x=0.01, y=0.99),
    template='plotly_white'
)

fig.show()


Better fit in aug-sep, but a poorer fit for the others months