### **Required installation for coordinate translation**

In [1]:
#pip install rijksdriehoek

### **Creating DataFrame, translating coordinates, dropping (some) irrelevant columns**

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from rijksdriehoek import rijksdriehoek
import numpy as np

def convert_cor(x, y):

    rd = rijksdriehoek.Rijksdriehoek()
    rd.rd_x = x
    rd.rd_y = y
    lat, lon = rd.to_wgs()
    
    return lat, lon

# Load the example 'tips' dataset from seaborn
df = pd.read_csv('waternet_waterquality_2007-2023/Amsterdam2007-2023_utf-8.csv', low_memory = False)

df = df.copy()

lattitude = []
longitude = []

for index, row in df.iterrows():
    lat_new, lon_new = convert_cor(row['locatie x'], row['locatie y'])
    lattitude.append(lat_new)
    longitude.append(lon_new)

df['lattitude'] = lattitude
df['longitude'] = longitude

df = df[pd.isna(df['opmerkingmeting'])]

df = df.drop(columns = ['locatie x', 'locatie y', 'locatie z', 'compartiment', 'fewsparametereenheidreferentie', 'fewsparameterparameterfractie', 'opmerkingmeting', 'waardebewerkingsmethode'])

display(df)

Unnamed: 0,locatiecode,locatie omschrijving,datum,fewsparameter,fewsparametercode,fewsparameternaam,fewsparametergrootheid,fewsparametereenheidequivalent,limietsymbool,meetwaarde,eenheid,afronding,lattitude,longitude
0,BGP008,Papelaan,3-1-2007 08:15,O2_mg/l,O2,Zuurstof (mg/l),CONCTTE,,,6.4,mg/l,Ja,52.321946,5.030963
1,BGP008,Papelaan,3-1-2007 08:15,O2_%,O2,Zuurstofverzadigingspercentage,VERZDGGD,,,49.0,%,Ja,52.321946,5.030963
2,BGP008,Papelaan,3-1-2007 08:15,T_oC,T,Temperatuur (oC),T,,,4.5,oC,Ja,52.321946,5.030963
3,BGP008,Papelaan,3-1-2007 08:15,ZICHT_m,ZICHT,Doorzicht (m),ZICHT,,>,0.2,m,Ja,52.321946,5.030963
4,BGP010,In bak achter benzinestation aflaat stedelijk ...,3-1-2007 08:30,O2_mg/l,O2,Zuurstof (mg/l),CONCTTE,,,3.1,mg/l,Ja,52.313926,5.041924
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91254,NIJ013,Als NIJ003 op 10 m,9-10-2023 11:02,O2_%,O2,Zuurstofverzadigingspercentage,VERZDGGD,,,97.0,%,Ja,52.424195,4.863088
91255,NIJ013,Als NIJ003 op 10 m,9-10-2023 11:02,O2_mg/l,O2,Zuurstof (mg/l),CONCTTE,,,9.4,mg/l,Ja,52.424195,4.863088
91256,NIJ014,Als NIJ003 op 15 m,9-10-2023 11:05,T_oC,T,Temperatuur (oC),T,,,16.2,oC,Ja,52.424195,4.863088
91257,NIJ014,Als NIJ003 op 15 m,9-10-2023 11:05,O2_%,O2,Zuurstofverzadigingspercentage,VERZDGGD,,,89.0,%,Ja,52.424195,4.863088


### **Creating dataframes for the measurements of interest**

In [3]:
df_ecoli = df[df['fewsparametercode'] == 'E_COLI']

df_anthcx = df[df['fewsparametercode'] == 'INTTNLETRCCN']

df_bluegreen = df[df['fewsparametercode'] == 'CHLFa']

### **A way of creating a new df with only a certain location name, the date, and the measured value**

Note that this is very limited as we need to input the precise location name. Also, some location names appear encoded, so they are only identifiable by the coordinates.

In [4]:
name = []
date = []
val = []
for index, row in df_ecoli.iterrows():
    if 'Gaasperplas,speelvijver Noord' in row['locatie omschrijving']:
        #print(row['locatie omschrijving'], row['datum'], row['meetwaarde'])
        name.append(row['locatie omschrijving'])
        date.append(row['datum'])
        val.append(row['meetwaarde'])

new_df = pd.DataFrame([name, date, val])
new_df = new_df.T
new_df.rename(columns={0: 'Name', 1: 'Date', 2: 'Value'}, inplace=True, )
new_df

Unnamed: 0,Name,Date,Value
0,"Gaasperplas,speelvijver Noord",16-4-2007 08:40,130.0
1,"Gaasperplas,speelvijver Noord",1-5-2007 08:30,110.0
2,"Gaasperplas,speelvijver Noord",14-5-2007 11:30,90.0
3,"Gaasperplas,speelvijver Noord",4-6-2007 12:00,350.0
4,"Gaasperplas,speelvijver Noord",18-6-2007 08:55,1000.0
...,...,...,...
191,"Gaasperplas,speelvijver Noord",7-8-2023 12:50,350.0
192,"Gaasperplas,speelvijver Noord",22-8-2023 13:34,670.0
193,"Gaasperplas,speelvijver Noord",4-9-2023 11:05,350.0
194,"Gaasperplas,speelvijver Noord",18-9-2023 10:05,4800.0
