In [9]:
# Imports
import geopandas as gpd
import pandas as pd

### General Description of "Zaanstad boorpunten data":
- Name: Zaanstad boorpunten data
- Type: Categorical variable

### Categories:
- Below AW (Above Water)
- Above AW
- Above I (Indicator)

Description: This variable represents soil measurements categorized into three groups based on their relative position to water levels. <br>
Size: The dataset contains a certain number of observations corresponding to different soil measurements. <br>

### Transformations:
Filtering by Measurement Year: Remove all rows that are measured before 2008. <br>
Spatial Filtering: Include only points that have 10 neighboring points within a radius of 25 meters. Points with more than 10 neighboring points should also be included.

In [2]:
# Load the specific layer from the geopackage
gdf = gpd.read_file("../Data/Bodemloket_20231009_Antea_Group.gpkg", layer="TBL_WFS_GEO_BORING_ONDERZOEK")

In [17]:
# Describtion of the raw data
print("General Description of Zaanstad boorpunten data:")
print("Number of observations:", len(gdf))
print("Categories and their counts:")
print(zaanstad_data_filtered['TOETS_WBB'].value_counts())

General Description of Zaanstad boorpunten data:
Number of observations: 50537
Categories and their counts:
>I      7155
>AW     6631
<=AW    2402
>T      2337
Name: TOETS_WBB, dtype: int64


In [5]:
# Transformation 1: Removing NULL values from DATUM_RAP and TOETS_WBB columns
gdf = gdf.dropna(subset=['DATUM_RAP'])
gdf = gdf.dropna(subset=['TOETS_WBB'])

In [12]:
# Transformation 2: Filtering by Measurement Year
# Convert DATUM_RAP to datetime format
gdf['DATUM_RAP'] = pd.to_datetime(gdf['DATUM_RAP'], format='%d-%m-%Y')

# Remove all measurements before 2008
gdf = gdf[gdf['DATUM_RAP'].dt.year >= 2008]

In [13]:
# Transformation 3: Spatial Filtering
# Calculate number of neighboring points within a radius of 25 meters
gdf['neighbors'] = gdf.geometry.apply(lambda x: len(gdf[gdf.geometry.within(x.buffer(25))]) - 1) # Subtract 1 to exclude the point itself

In [14]:
# Filter points with 10 or more neighbors
zaanstad_data_filtered = gdf[gdf['neighbors'] >= 10]

In [15]:
zaanstad_data_filtered

Unnamed: 0,ID,BOORPUNT_ID,NAAM_BOORPUNT,BOORPUNTTYPE,EINDDIEPTE,DIEPTEGW,TOETS_WBB,TOETS_GW,TOETS_BBK,OPMERKINGEN,ONDERZOEK_ID,ONDERZOEKCODE,TYPE_OND,NAAM_RAP,NUMMER_RAP,AUTEUR_RAP,DATUM_RAP,LOCATIECODES,geometry,neighbors
23,492172.0,492172.0,69,grondboring,50 cm,\n,<=AW,,Achtergrondwaarde,,520436.0,NZ047902424,Orienterend bodemonderzoek,UNDER CONSTRUCTION - Loodlocatie L233 - Oriënt...,R001-1282282TMA-V02-aao-NL,TAUW bv,2021-11-15,ZA047918906,POINT (111844.548 501301.354),12
24,492173.0,492173.0,70,grondboring,50 cm,\n,<=AW,,Achtergrondwaarde,,520436.0,NZ047902424,Orienterend bodemonderzoek,UNDER CONSTRUCTION - Loodlocatie L233 - Oriënt...,R001-1282282TMA-V02-aao-NL,TAUW bv,2021-11-15,ZA047918906,POINT (111833.966 501287.223),10
29,492178.0,492178.0,75,grondboring,50 cm,\n,<=AW,,Achtergrondwaarde,,520436.0,NZ047902424,Orienterend bodemonderzoek,UNDER CONSTRUCTION - Loodlocatie L233 - Oriënt...,R001-1282282TMA-V02-aao-NL,TAUW bv,2021-11-15,ZA047918906,POINT (111899.932 501268.941),11
31,492180.0,492180.0,77,grondboring,50 cm,\n,<=AW,,Achtergrondwaarde,,520436.0,NZ047902424,Orienterend bodemonderzoek,UNDER CONSTRUCTION - Loodlocatie L233 - Oriënt...,R001-1282282TMA-V02-aao-NL,TAUW bv,2021-11-15,ZA047918906,POINT (111828.672 501312.238),13
108,492181.0,492181.0,78,grondboring,50 cm,\n,<=AW,,Achtergrondwaarde,,520436.0,NZ047902424,Orienterend bodemonderzoek,UNDER CONSTRUCTION - Loodlocatie L233 - Oriënt...,R001-1282282TMA-V02-aao-NL,TAUW bv,2021-11-15,ZA047918906,POINT (111825.046 501315.857),11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119951,493044.0,493044.0,027,grondboring,60 cm,\n,>I,,Niet toepasbaar,,520878.0,NZ047902475,Nader en Asbestonderzoek,Aris van broekweg 1-3 te Zaandam,0458913.138,Antea Group,2020-09-21,NH047900215,POINT (116058.292 494235.399),47
119952,493048.0,493048.0,031,grondboring,50 cm,\n,>I,,Niet toepasbaar,,520878.0,NZ047902475,Nader en Asbestonderzoek,Aris van broekweg 1-3 te Zaandam,0458913.138,Antea Group,2020-09-21,NH047900215,POINT (116054.797 494255.188),32
119953,493064.0,493064.0,201,grondboring,150 cm,\n,>AW,,Wonen,,520878.0,NZ047902475,Nader en Asbestonderzoek,Aris van broekweg 1-3 te Zaandam,0458913.138,Antea Group,2020-09-21,NH047900215,POINT (116049.205 494248.979),41
119954,493068.0,493068.0,102,grondboring,150 cm,\n,>AW,,Wonen,,520878.0,NZ047902475,Nader en Asbestonderzoek,Aris van broekweg 1-3 te Zaandam,0458913.138,Antea Group,2020-09-21,NH047900215,POINT (116027.482 494239.359),27
