In [14]:
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display
from ipywidgets import GridspecLayout
import ipywidgets as widgets
import math
import geojson

def display_all(df):
    pd.set_option('display.max_rows', None)
    display(df)
    pd.reset_option('display.max_rows')

pd.set_option('display.max_columns', None)

In [15]:
biotic_df = pd.read_excel('../../data/PHYTO.xlsx', sheet_name='PHYTO_SURF')
biotic_df.head()

Unnamed: 0,LOC_CODE,LOC,WATERBODY,TYPE,DATE_SMP,YEAR,MONTH,DAY,Q_clndr,Q_eco,PROD_CODE,SPECIES,SPEC,GROUP,AMT_MEAS,cL,LcL
0,GROOTGND,EDG,EEMSDOLLARD,ESTUARINE,2005-01-21,,,,,1,OW,Actinocyclus normanii,Acn,DIAT,1.0,5000.0,
1,GROOTGND,EDG,EEMSDOLLARD,ESTUARINE,1994-01-25,,,,,1,WATSGL,Actinocyclus normanii,Acn,DIAT,,2000.0,
2,GROOTGND,EDG,EEMSDOLLARD,ESTUARINE,1994-02-24,,,,,1,WATSGL,Actinocyclus normanii,Acn,DIAT,,485.0,
3,GROOTGND,EDG,EEMSDOLLARD,ESTUARINE,2001-03-22,,,,,2,OW,Actinocyclus normanii,Acn,DIAT,1.0,833.333333,
4,GROOTGND,EDG,EEMSDOLLARD,ESTUARINE,2002-03-13,,,,,2,OW,Actinocyclus normanii,Acn,DIAT,2.0,4000.0,


In [16]:
grouped_locations = biotic_df.groupby(["LOC", "LOC_CODE"]).size().reset_index(name='count').sort_values(by='count', ascending=False).reset_index(drop=True)

display(grouped_locations.head())

Unnamed: 0,LOC,LOC_CODE,count
0,NW10,NOORDWK10,8065
1,EDH,HUIBGOT,6954
2,WZM,MARSDND,6927
3,WZD,DANTZGT,6460
4,WSV,VLISSGBISSVH,5926


In [17]:
df = pd.read_excel('../../miscellaneous/2024_NIOZ_UvA_Plankton_ABIO_coupling.xlsx', sheet_name='Tables ABIO', skiprows=2, nrows=27, usecols="A,B, D:H, J")

df.rename(columns={'LATITUDE ( WGS84 )': 'Lat', 'LONGITUDE ( WGS84 )': 'Lon'}, inplace=True)


display(df.head())

Unnamed: 0,DONAR CODE,DONAR-DESCRIPTION,LOC,Lat,Lon,WATERBODY,TYPE,h (m)
0,GROOTGND,Groote Gat noord,EDG,NB 53 18 15,OL 07 09 24,EEMSDOLLARD,ESTUARINE,5.9
1,HUIBGOT,Huibertgat oost,EDH,NB 53 33 33,OL 06 39 40,EEMSDOLLARD,ESTUARINE,27.9
2,DREISR,Dreischor,GMD,NB 51 42 53,OL 03 59 58,GREVELINGENMEER,LAKE,19.3
3,GOERE6,Goeree 6 km uit de kust,GOE6,NB 51 52 08,OL 03 52 20,NOORDZEE,COAST,8.8
4,NOORDWK10,Noordwijk 10 km uit de kust,NW10,NB 52 18 05,OL 04 18 04,NOORDZEE,COAST,18.2


In [18]:
def dms_to_dd(dms):
    _, degrees, minutes, seconds = dms.split()

    return int(degrees) + int(minutes)/60 + int(seconds)/3600

df["Lat"] = df["Lat"].apply(dms_to_dd)
df["Lon"] = df["Lon"].apply(dms_to_dd)

df

Unnamed: 0,DONAR CODE,DONAR-DESCRIPTION,LOC,Lat,Lon,WATERBODY,TYPE,h (m)
0,GROOTGND,Groote Gat noord,EDG,53.304167,7.156667,EEMSDOLLARD,ESTUARINE,5.9
1,HUIBGOT,Huibertgat oost,EDH,53.559167,6.661111,EEMSDOLLARD,ESTUARINE,27.9
2,DREISR,Dreischor,GMD,51.714722,3.999444,GREVELINGENMEER,LAKE,19.3
3,GOERE6,Goeree 6 km uit de kust,GOE6,51.868889,3.872222,NOORDZEE,COAST,8.8
4,NOORDWK10,Noordwijk 10 km uit de kust,NW10,52.301389,4.301111,NOORDZEE,COAST,18.2
5,NOORDWK2,Noordwijk 2 km uit de kust,NW2,52.260556,4.404722,NOORDZEE,COAST,13.0
6,NOORDWK20,Noordwijk 20 km uit de kust,NW20,52.340833,4.173611,NOORDZEE,COAST,20.0
7,NOORDWK70,Noordwijk 70 km uit de kust,NW70,52.585278,3.53,NOORDZEE,OPENSEA,28.0
8,LODSGT,Lodijkse Gat,OSL,51.515278,4.126944,OOSTERSCHELDE,ESTUARINE,4.8
9,ROTTMPT3,Rottumerplaat 3 km uit de kust,RP3,53.565278,6.562778,NOORDZEE,COAST,9.0


In [19]:
df_merged = df.merge(grouped_locations[["LOC", "count"]], on="LOC", how='right')

display(df_merged)

Unnamed: 0,DONAR CODE,DONAR-DESCRIPTION,LOC,Lat,Lon,WATERBODY,TYPE,h (m),count
0,NOORDWK10,Noordwijk 10 km uit de kust,NW10,52.301389,4.301111,NOORDZEE,COAST,18.2,8065
1,HUIBGOT,Huibertgat oost,EDH,53.559167,6.661111,EEMSDOLLARD,ESTUARINE,27.9,6954
2,MARSDND,Marsdiep noord,WZM,52.9825,4.75,WADDENZEE,ESTUARINE,15.9,6927
3,DANTZGT,Dantziggat,WZD,53.401111,5.726944,WADDENZEE,ESTUARINE,9.5,6460
4,VLISSGBISSVH,Vlissingen boei SSVH,WSV,51.411944,3.565556,WESTERSCHELDE,ESTUARINE,18.2,5926
5,NOORDWK2,Noordwijk 2 km uit de kust,NW2,52.260556,4.404722,NOORDZEE,COAST,13.0,5347
6,NOORDWK20,Noordwijk 20 km uit de kust,NW20,52.340833,4.173611,NOORDZEE,COAST,20.0,5345
7,HANSWGL,Hansweert geul,WSH,51.436111,4.014167,WESTERSCHELDE,ESTUARINE,17.3,4765
8,TERSLG10,Terschelling 10 km uit de kust,TS10,53.460278,5.099444,NOORDZEE,COAST,24.3,4682
9,NOORDWK70,Noordwijk 70 km uit de kust,NW70,52.585278,3.53,NOORDZEE,OPENSEA,28.0,4091


In [20]:
px.set_mapbox_access_token("pk.eyJ1IjoiYXJkamFubyIsImEiOiJjbGRoZjZyeXQxNnZhM25vNmM5eW85MWprIn0.nEkhCKPA2i1hhOGf7dsO7Q")

color_dict = {"COAST": "#357ded", "ESTUARINE": "#32e875", "OPENSEA": "#1F2BB3", "LAKE": "#56eef4"}

fig = px.scatter_mapbox(df_merged, lat='Lat', lon='Lon', color='TYPE',
                    hover_name='DONAR-DESCRIPTION',
                    #  mapbox_style='open-street-map',
                    color_discrete_map=color_dict,
                    mapbox_style='dark',
                    text="DONAR CODE",
                    # size="count",
                    # hover_data={'Hoofdcategorie', 'HuidigeStatus', 'MeldingAfgehandeld', 'Straat', 'Datum', 'Id'},
                    title=f'Test sites by type of water',
                    opacity=.9,
                    # animation_frame='season',
                    zoom=6)


fig.update_layout(
    autosize=False,
    width=1000,  # Adjust as needed
    height=800,  # Adjust as needed
    title={
        "font_size": 20,
    },
    annotations=[
        dict(
        text='Blob size indicates number of measurements',  # Customize this text as needed
        align='left',
        showarrow=False,
        xref='paper',
        yref='paper',
        x=.5,
        y=-.07,
        # bordercolor='black',
        # borderwidth=.6,
        font=dict(
                size=17  # Adjust the subtitle font size here
            )
        )
    ],
    # legend=dict(
    #     title=dict(
    #         text='Types of Water',  # Customize this title as needed
    #         font_size=16  # Adjust the legend title font size here
    #     ),
    #     font=dict(
    #         size=14  # Adjust the legend text font size here
    #     )
    # )


)


fig.update_traces(textfont=dict(size=16, color="white"))

fig.show()

In [21]:
display(biotic_df["LOC_CODE"].unique(), df["DONAR CODE"].unique())

array(['GROOTGND', 'HUIBGOT', 'DREISR', 'NOORDWK10', 'TERSLG4',
       'NOORDWK20', 'TERSLG10', 'NOORDWK2', 'NOORDWK70', 'WALCRN20',
       'WALCRN2', 'GOERE6', 'ROTTMPT3', 'WALCRN70', 'TERSLG100',
       'TERSLG135', 'TERSLG175', 'TERSLG235', 'ROTTMPT70', 'ROTTMPT50',
       'LODSGT', 'SOELKKPDOT', 'MARSDND', 'DANTZGT', 'SCHAARVODDL',
       'VLISSGBISSVH', 'HANSWGL'], dtype=object)

array(['GROOTGND', 'HUIBGOT', 'DREISR', 'GOERE6', 'NOORDWK10', 'NOORDWK2',
       'NOORDWK20', 'NOORDWK70', 'LODSGT', 'ROTTMPT3', 'ROTTMPT50',
       'ROTTMPT70', 'TERSLG10', 'TERSLG100', 'TERSLG135', 'TERSLG175',
       'TERSLG235', 'TERSLG4', 'SOELKKPDOT', 'WALCRN2', 'WALCRN20',
       'WALCRN70', 'HANSWGL', 'SCHAARVODDL', 'VLISSGBISSVH', 'DANTZGT',
       'MARSDND'], dtype=object)

In [24]:
clusters = [['NOORDWK70',
  'ROTTMPT50',
  'ROTTMPT70',
  'TERSLG100',
  'TERSLG135',
  'TERSLG175',
  'TERSLG235'],
 ['NOORDWK10',
  'NOORDWK2',
  'NOORDWK20',
  'TERSLG10',
  'TERSLG4',
  'WALCRN20',
  'WALCRN70'],
 ['DANTZGT',
  'GOERE6',
  'HUIBGOT',
  'LODSGT',
  'MARSDND',
  'ROTTMPT3',
  'VLISSGBISSVH',
  'WALCRN2'],
 ['HANSWGL'],
 ['GROOTGND', 'SCHAARVODDL'],
 ['DREISR', 'SOELKKPDOT']]

df_clustered = df_merged

df_clustered["cluster"] = "0"

for i, cluster in enumerate(clusters):
  df_clustered.loc[df_clustered["DONAR CODE"].isin(cluster), "cluster"] = f"{i}"

display(df_clustered)

Unnamed: 0,DONAR CODE,DONAR-DESCRIPTION,LOC,Lat,Lon,WATERBODY,TYPE,h (m),count,cluster
0,NOORDWK10,Noordwijk 10 km uit de kust,NW10,52.301389,4.301111,NOORDZEE,COAST,18.2,8065,1
1,HUIBGOT,Huibertgat oost,EDH,53.559167,6.661111,EEMSDOLLARD,ESTUARINE,27.9,6954,2
2,MARSDND,Marsdiep noord,WZM,52.9825,4.75,WADDENZEE,ESTUARINE,15.9,6927,2
3,DANTZGT,Dantziggat,WZD,53.401111,5.726944,WADDENZEE,ESTUARINE,9.5,6460,2
4,VLISSGBISSVH,Vlissingen boei SSVH,WSV,51.411944,3.565556,WESTERSCHELDE,ESTUARINE,18.2,5926,2
5,NOORDWK2,Noordwijk 2 km uit de kust,NW2,52.260556,4.404722,NOORDZEE,COAST,13.0,5347,1
6,NOORDWK20,Noordwijk 20 km uit de kust,NW20,52.340833,4.173611,NOORDZEE,COAST,20.0,5345,1
7,HANSWGL,Hansweert geul,WSH,51.436111,4.014167,WESTERSCHELDE,ESTUARINE,17.3,4765,3
8,TERSLG10,Terschelling 10 km uit de kust,TS10,53.460278,5.099444,NOORDZEE,COAST,24.3,4682,1
9,NOORDWK70,Noordwijk 70 km uit de kust,NW70,52.585278,3.53,NOORDZEE,OPENSEA,28.0,4091,0


In [25]:
px.set_mapbox_access_token("pk.eyJ1IjoiYXJkamFubyIsImEiOiJjbGRoZjZyeXQxNnZhM25vNmM5eW85MWprIn0.nEkhCKPA2i1hhOGf7dsO7Q")

color_dict = {"2": "#32e875", "3": "#32e875", "4": "#357ded", "1": "#0F1BB3", "0": "#56ffff"}

fig = px.scatter_mapbox(df_clustered, lat='Lat', lon='Lon', color='cluster',
                    hover_name='DONAR-DESCRIPTION',
                    color_discrete_map=color_dict,
                    mapbox_style='light',
                    size="count",
                    title=f'Clustered Sample Locations',
                    opacity=.9,
                    zoom=6)

fig.update_layout(
    autosize=False,
    width=1000,  # Adjust as needed
    height=800,  # Adjust as needed
    title={
        "font_size": 20,
    },
)


fig.show()