In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
import glob
import pandas as pd
import geopandas as gpd
import rasterio
from nso_ds_classes.nso_ds_normalize_scaler import scaler_class_all
from annotations.data_preparation import extract_dataframe_pixels_values_from_tif_and_polygons
from annotations.utils import get_scaler_filepath
from annotations.data_loader import load_annotations_polygons, load_annotations_polygons_gpkg
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
import os
import settings_blob

In [2]:
# function to upload the annotations to the cloud.
def upload_to_blob(apath, directory_blob=""):

    container_name = "satellite-images-nso"
    blob_name = directory_blob+os.path.basename(apath) 


    blob_service_client = BlobServiceClient.from_connection_string(settings_blob.connection_string)
    container_client = blob_service_client.get_container_client(container_name)
    blob_client = container_client.get_blob_client(blob_name)

    with open(apath, "rb") as data:
            blob_client.upload_blob(data ,  overwrite=True)


## Transform Polygon Annotations to Pixel Annotation Parquet files

This script is intended to transform given polygon annotations in geojson (made in i.e. QGis) into pixel level annotations, with scaled band values. The pixel level annotations are written to parquet files.
Change the variables below to match the situation on your device.
Note that these transformations are quite quickly very memory intensive.

Date: 2024-01-11 \
Author: Pieter Kouyzer


In [3]:
# Set variables
location = "Voornes_Duin"

if location == "Voornes_Duin":
    satellite = "PNEO"
    images_folder = "E:/Data/remote_sensing/satellite-images/PNEO_30CM/Voornes Duincv /"
    regex = "*_height_asphalt_crop.tif"
    annotations_folder = "C:/Users/pzhadmin/Data/remote-sensing/annotations/"
    annotations_polygon_filename_regex = "Voornes Duin PNEO_30CM Annotations_2024-01-19.geojson"
    scaler_folder_path = "../../scalers/"
    col_names = ["r", "g", "b", "n", "e", "d", "ndvi", "re_ndvi", "height"]
    pixel_filepath = os.path.join(annotations_folder, f"{location}_{satellite}_pixel_annotations.parquet")
    pixel_scaled_filepath = os.path.join(annotations_folder, f"{location}_{satellite}_pixel_annotations_scaled.parquet")

elif location == "Coepelduynen":
   
    images_folder = "E:/data/"
    regex = f"{location}/2023*re*asphalt_crop.tif"
    annotations_path = "C:/repos/satellite-images-nso-datascience/data/annotations/Coepelduynen/Annotations_Coepelduynen_2023.gpkg"


### Prepare data

In [4]:
if location == "Voornes_Duin":
    annotations_polygons_gdf = load_annotations_polygons(annotations_folder, annotations_polygon_filename_regex, regex, images_folder)

elif location == "Coepelduynen":
    annotations_polygons_gdf = load_annotations_polygons_gpkg("C:/repos/satellite-images-nso-datascience/data/annotations/Coepelduynen/Annotations_Coepelduynen_2023.gpkg")
    annotations_polygons_gdf = annotations_polygons_gdf.reset_index(drop=True)
    # Custom actions to set data straight.
    annotations_polygons_gdf.loc[annotations_polygons_gdf["name"] != "Annotations_Coepelduynen_2023","label"] = annotations_polygons_gdf[annotations_polygons_gdf["name"] != "Annotations_Coepelduynen_2023"]["Label_name"]
    annotations_polygons_gdf = annotations_polygons_gdf.drop(["Label_name"], axis=1)
    annotations_polygons_gdf['label'] = annotations_polygons_gdf['label'].str.replace("\nAsphalt","Asphalt")
    annotations_polygons_gdf["Label"] = annotations_polygons_gdf["label"]
    annotations_polygons_gdf = annotations_polygons_gdf.drop(["label"], axis=1)

ValueError: No objects to concatenate

In [None]:
if location == "Voornes_Duin":
    if os.path.isfile(pixel_filepath):
        df = pd.read_parquet(pixel_filepath)
    else:
        dfs = []
        for tif_file in glob.glob(os.path.join(images_folder, regex)):
            tif_file = tif_file.replace("\\","/")
            print(tif_file)
            name_tif_file = tif_file.split("/")[-1].split(".")[0]
            with rasterio.open(tif_file) as dataset:
                dfs += [
                    extract_dataframe_pixels_values_from_tif_and_polygons(
                        tif_dataset=dataset, polygon_gdf=annotations_polygons_gdf,name_tif_file=name_tif_file
                    )
                ]
            
        df = pd.concat(dfs)
        df.to_parquet(pixel_filepath)

elif location == "Coepelduynen":
    #Custom aggregation for annnotations across all satellite images
    dfs = []
    for tif_file in glob.glob(os.path.join(images_folder, regex)):
            tif_file = tif_file.replace("\\","/")
            print(tif_file)
            name_tif_file = tif_file.split("/")[-1].split(".")[0]
            with rasterio.open(tif_file) as dataset:
                dfs += [
                extract_dataframe_pixels_values_from_tif_and_polygons(
                            tif_dataset=dataset,
                            polygon_gdf=annotations_polygons_gdf[
                                annotations_polygons_gdf["name"] == "Annotations_Coepelduynen_2023"
                            ],
                            name_tif_file=tif_file.split("/")[-1],
                            name_annotations="Annotations_Coepelduynen_2023",
                        )          
                ]
            
    df = pd.concat(dfs)

    # Annotations for specific satellite images
    dfs = []
    for tif_file in glob.glob(os.path.join(images_folder, regex)):
        tif_file = tif_file.replace("\\","/")
        print(tif_file)
        name_tif_file = tif_file.split("/")[-1].split(".")[0].split("_")[0]+"_annotations"
        print(name_tif_file)
        with rasterio.open(tif_file) as dataset:
                dfs += [
                extract_dataframe_pixels_values_from_tif_and_polygons(
                            tif_dataset=dataset,
                            polygon_gdf=annotations_polygons_gdf[
                                annotations_polygons_gdf["name"] == name_tif_file
                            ],
                            name_tif_file=tif_file.split("/")[-1],
                            name_annotations=name_tif_file,
                        )          
                ]
        
    
    dfs = pd.concat(dfs)
    df = pd.concat([dfs,df])
    
    # Extra check to see if all the correct bands are found
    #if "ndvi" not in df.columns:

    #    if "n" in df.columns:
    #        df['ndvi'] =df.apply(lambda x:(((x['n'] - x['r']) / (x['n'] + x['r']))*100) + 100, axis =1)
    #    if "i" in df.columns:
    #        df['ndvi'] =df.apply(lambda x:(((x['i'] - x['r']) / (x['i'] + x['r']))*100) + 100, axis =1)
    #    else:
    #        print("No infrared band found for ndvi")

    #if "ndwi" not in df.columns:
    #    if "n" in df.columns:
    #        df['ndwi'] = df.apply(lambda x: ((x['g']- x['n'])/(x['n']+x['g'])*100)+100, axis=1 )
    #    if "i" in df.columns:
    #        df['ndwi'] = df.apply(lambda x: ((x['g']- x['i'])/(x['i']+x['g'])*100)+100, axis=1 )
    #    else:
    #        print("No infrared band found for ndwi")

    #if "re_ndvi" not in df.columns:
    #    if "e" in df.columns:
    #        df['re_ndvi'] =df.apply(lambda x:(((x['e'] - x['r']) / (x['e'] + x['r']))*100) + 100, axis =1)
    #    else:
    #        print("")

    df.to_parquet("annotations_pixel_dataframes/annotaties_coepelduynen_to_pixel_2023.parquet")

# Nieuwkoopse Plassen annotations.

In [6]:
#annotation_folder_path = "C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/{to_replace}*.geojson"
annotation_folder_path = "C:/repos/satellite-images-nso-datascience/data/annotations/Nieuwkoopse_Plassen/{to_replace}*.geojson"
#tif_files_path = "E:/data/nieuwkoopse_plassen_schippersgat/"
tif_files_path = "E:/data/nieuwkoopse_plassen/"
#output_name_annotations = "./annotations_pixel_dataframes/PNEO_waterplanten_annotations_schippersgat.parquet"
#output_name_annotations = "./annotations_pixel_dataframes/PNEO_waterplanten_annotations_Nieuwkoopse_Plassen.parquet"

In [7]:
# PNEO annotations
dfs = []
for annotation_file in glob.glob(annotation_folder_path.replace("{to_replace}", "2023")):
    annotation_file = annotation_file.replace("\\","/")
    print(annotation_file)

    tif_file = glob.glob(tif_files_path+annotation_file.split("/")[-1].split("_")[0]+"*ndwi_re_ndvi*")[0].replace("\\","/")
    print(tif_file)

    name_tif_file = tif_file.split("/")[-1].split(".")[0].split("_")[0]+"_annotations"
    print(name_tif_file)

    waterplanten_annotations = gpd.read_file(annotation_file)
    waterplanten_annotations["name"] = name_tif_file

    if "label" in waterplanten_annotations.columns:
        waterplanten_annotations['Label'] = waterplanten_annotations['label']
    try: 
        waterplanten_annotations["Label"] = waterplanten_annotations["class"]
    except Exception as e:
        print(e)

    waterplanten_annotations  = waterplanten_annotations.to_crs("EPSG:28992")

    
    with rasterio.open(tif_file) as dataset:
         dfs += [extract_dataframe_pixels_values_from_tif_and_polygons(dataset, waterplanten_annotations, tif_file.split("/")[-1], name_tif_file)]

df = pd.concat(dfs)
# TODO: update the descriptions
#df['ndwi'] =df['height']
#df = df.drop(['height'], axis=1)
df.to_parquet(output_name_annotations)

C:/repos/satellite-images-nso-datascience/data/annotations/Nieuwkoopse_Plassen/20230603_waterplant_annotations.geojson
E:/data/nieuwkoopse_plassen/20230603_104624_PNEO-03_1_1_30cm_RD_12bit_RGBNED_Uithoorn_Nieuwkoopse_Plassen_De_Haeck_cropped_ndwi_re_ndvi.tif
20230603_annotations
'class'
Found some empty pixels!
C:/repos/satellite-images-nso-datascience/data/annotations/Nieuwkoopse_Plassen/20230905_waterplant_annotations_new_rd.geojson
E:/data/nieuwkoopse_plassen/20230905_105231_PNEO-03_1_1_30cm_RD_12bit_RGBNED_Uithoorn_Nieuwkoopse_Plassen_De_Haeck_cropped_ndwi_re_ndvi.tif
20230905_annotations
'class'
Found some empty pixels!


NameError: name 'output_name_annotations' is not defined

In [8]:
df

Unnamed: 0,r,g,b,n,e,d,ndwi,re_ndvi,rd_x,rd_y,label,image,date,season,annotation_no
0,231.0,395.0,416.0,1728.0,932.0,597.0,37.0,160.0,117759.45,463110.75,Ground,20230603_104624_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230603_104624,Summer,0_20230603_annotations
1,230.0,394.0,416.0,1732.0,930.0,596.0,37.0,160.0,117759.75,463110.75,Ground,20230603_104624_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230603_104624,Summer,0_20230603_annotations
2,233.0,399.0,420.0,1743.0,935.0,598.0,37.0,160.0,117760.05,463110.75,Ground,20230603_104624_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230603_104624,Summer,0_20230603_annotations
3,234.0,402.0,424.0,1747.0,936.0,599.0,37.0,160.0,117760.35,463110.75,Ground,20230603_104624_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230603_104624,Summer,0_20230603_annotations
4,223.0,394.0,416.0,1709.0,917.0,591.0,37.0,160.0,117760.65,463110.75,Ground,20230603_104624_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230603_104624,Summer,0_20230603_annotations
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7116360,186.0,306.0,292.0,1200.0,754.0,383.0,40.0,160.0,117276.15,464602.95,Waterplants,20230905_105231_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230905_105231,Fall,45_20230905_annotations
7116361,212.0,329.0,312.0,1259.0,789.0,403.0,41.0,157.0,117275.55,464602.65,Waterplants,20230905_105231_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230905_105231,Fall,45_20230905_annotations
7116362,206.0,325.0,308.0,1252.0,782.0,398.0,41.0,158.0,117275.85,464602.65,Waterplants,20230905_105231_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230905_105231,Fall,45_20230905_annotations
7116363,188.0,311.0,293.0,1214.0,760.0,386.0,40.0,160.0,117276.15,464602.65,Waterplants,20230905_105231_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230905_105231,Fall,45_20230905_annotations


In [9]:
# Upload annotations to the cloud
upload_to_blob("./annotations_pixel_dataframes/PNEO_waterplanten_annotations.parquet", "Nieuwkoopse_Plassen/")

In [9]:
# Superview annotations
annotation_folder_path = "C:/repos/satellite-images-nso-datascience/data/annotations/Nieuwkoopse_Plassen/{to_replace}*.geojson"
#tif_files_path = "E:/data/nieuwkoopse_plassen_schippersgat/"
tif_files_path = "E:/data/nieuwkoopse_plassen/"
dfs = []
for annotation_file in glob.glob(annotation_folder_path.replace("{to_replace}", "2022"))+  glob.glob(annotation_folder_path.replace("{to_replace}", "2021"))+  glob.glob(annotation_folder_path.replace("{to_replace}", "2019"))+  glob.glob(annotation_folder_path.replace("{to_replace}", "2020")): 
    annotation_file = annotation_file.replace("\\","/")
    annotation_file_name = annotation_file.split("/")[-1]
    print(annotation_file_name)

    
    if len(annotation_file_name.split("_")) == 3:
        print(tif_files_path+annotation_file_name.split("_")[0]+"*ndwi*")
        tif_file = glob.glob(tif_files_path+annotation_file_name.split("_")[0]+"*ndwi*")[0].replace("\\","/")
    
    if len(annotation_file_name.split("_")) > 3:
        print(tif_files_path+annotation_file_name.split("_")[0]+"_"+annotation_file_name.split("_")[1]+"*ndwi*")
        tif_file = glob.glob(tif_files_path+annotation_file_name.split("_")[0]+"_"+annotation_file_name.split("_")[1]+"*ndwi*")[0].replace("\\","/")
    print(tif_file)

    name_tif_file = tif_file.split("/")[-1].split(".")[0].split("_")[0]+"_annotations"
    print(name_tif_file)

    waterplanten_annotations = gpd.read_file(annotation_file)
    waterplanten_annotations["name"] = name_tif_file

    if "label" in waterplanten_annotations.columns:
        waterplanten_annotations["Label"] = waterplanten_annotations["label"]

    try: 
        waterplanten_annotations["Label"] = waterplanten_annotations["class"]
    except Exception as e:
        print(e)

    waterplanten_annotations  = waterplanten_annotations.to_crs("EPSG:28992")

    
    with rasterio.open(tif_file) as dataset:
         dfs += [extract_dataframe_pixels_values_from_tif_and_polygons(dataset, waterplanten_annotations, tif_file.split("/")[-1], name_tif_file )]

20220318_waterplant_annotations.geojson
E:/data/nieuwkoopse_plassen/20220318*ndwi*
E:/data/nieuwkoopse_plassen/20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_Bodegraven_Nieuwkoopse_Plassen_De_Haeck_cropped_ndwi_ndvi.tif
20220318_annotations
'class'
Found some empty pixels!
20220803_2waterplant_annotations.geojson
E:/data/nieuwkoopse_plassen/20220803*ndwi*
E:/data/nieuwkoopse_plassen/20220803_105908_SV2-01_SV_RD_11bit_RGBI_50cm_Boskoop_Nieuwkoopse_Plassen_De_Haeck_cropped_ndwi_ndvi.tif
20220803_annotations
'class'
Found some empty pixels!
20220803_waterplant_annotations.geojson
E:/data/nieuwkoopse_plassen/20220803*ndwi*
E:/data/nieuwkoopse_plassen/20220803_105908_SV2-01_SV_RD_11bit_RGBI_50cm_Boskoop_Nieuwkoopse_Plassen_De_Haeck_cropped_ndwi_ndvi.tif
20220803_annotations
'class'
Found some empty pixels!
20220811_waterplant_annotations.geojson
E:/data/nieuwkoopse_plassen/20220811*ndwi*
E:/data/nieuwkoopse_plassen/20220811_110734_SV1-04_SV_RD_11bit_RGBI_50cm_Mijdrecht_Nieuwkoopse_Plassen_De_

In [10]:
df = pd.concat(dfs)

In [11]:
df

Unnamed: 0,r,g,b,i,ndwi,ndvi,rd_x,rd_y,label,image,date,season,annotation_no,re_ndvi
0,266.0,215.0,125.0,126.0,126.0,64.0,115102.25,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
1,267.0,215.0,125.0,126.0,126.0,64.0,115102.75,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
2,268.0,216.0,125.0,127.0,125.0,64.0,115103.25,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
3,268.0,216.0,125.0,126.0,126.0,63.0,115103.75,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
4,267.0,215.0,124.0,124.0,126.0,63.0,115104.25,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234941,445.0,460.0,311.0,934.0,65.0,,117286.25,462759.75,Ground,20200812_111845_SV1-02_SV_RD_11bit_RGBI_50cm_V...,20200812_111845,Summer,20_20200812_annotations,25.0
234942,446.0,461.0,312.0,943.0,65.0,,117286.75,462759.75,Ground,20200812_111845_SV1-02_SV_RD_11bit_RGBI_50cm_V...,20200812_111845,Summer,20_20200812_annotations,25.0
234943,448.0,463.0,315.0,956.0,65.0,,117287.25,462759.75,Ground,20200812_111845_SV1-02_SV_RD_11bit_RGBI_50cm_V...,20200812_111845,Summer,20_20200812_annotations,25.0
234944,450.0,466.0,318.0,976.0,64.0,,117287.75,462759.75,Ground,20200812_111845_SV1-02_SV_RD_11bit_RGBI_50cm_V...,20200812_111845,Summer,20_20200812_annotations,24.0


In [14]:
df['label'] = df['label'].replace("Waterplanten", "Waterplants")
df['label'] = df['label'].replace("WAter", "Water")
df['label'] = df['label'].replace("Waterolants", "Waterplants")
df['label'] = df['label'].replace("waterolants", "Waterplants")
df['label'] = df['label'].replace("waterplants", "Waterplants")
df['label'] = df['label'].replace("WAterplants", "Waterplants")

In [15]:
df.drop(["image","season","annotation_no", "date"], axis=1).groupby(["label"]).mean()

Unnamed: 0_level_0,r,g,b,i,ndwi,ndvi,rd_x,rd_y,re_ndvi
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Ground,520.36316,441.625371,336.073749,959.782375,63.393534,128.005755,115330.195674,461997.165513,25.336463
Water,548.070262,392.664041,284.726999,241.876233,120.801856,63.856536,114746.607631,461943.601829,50.775922
Waterplants,587.498523,560.408441,378.398958,1024.514813,71.049176,122.590928,115447.708252,462518.480091,24.717413


# Optional add Schippersgat

In [16]:
annotation_folder_path = "C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/{to_replace}*.geojson"
tif_files_path = "E:/data/nieuwkoopse_plassen_schippersgat/"
# Superview annotations
dfs_schippersgat = []
for annotation_file in glob.glob(annotation_folder_path.replace("{to_replace}", "2022"))+  glob.glob(annotation_folder_path.replace("{to_replace}", "2021"))+  glob.glob(annotation_folder_path.replace("{to_replace}", "2019"))+  glob.glob(annotation_folder_path.replace("{to_replace}", "2020")): 
    annotation_file = annotation_file.replace("\\","/")
    print(annotation_file)

  
    tif_file = glob.glob(tif_files_path+annotation_file.split("/")[-1].split("_")[0]+"*ndwi*")[0].replace("\\","/")
    print(tif_file)

    name_tif_file = tif_file.split("/")[-1].split(".")[0].split("_")[0]+"_annotations"
    print(name_tif_file)

    waterplanten_annotations = gpd.read_file(annotation_file)
    waterplanten_annotations["name"] = name_tif_file

    if "label" in waterplanten_annotations.columns:
        waterplanten_annotations["Label"] = waterplanten_annotations["label"]

    try: 
        waterplanten_annotations["Label"] = waterplanten_annotations["class"]
    except Exception as e:
        print(e)

    waterplanten_annotations  = waterplanten_annotations.to_crs("EPSG:28992")

    
    with rasterio.open(tif_file) as dataset:
         dfs_schippersgat += [extract_dataframe_pixels_values_from_tif_and_polygons(dataset, waterplanten_annotations, tif_file.split("/")[-1], name_tif_file )]

C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20220705_waterplant_annotations.geojson
E:/data/nieuwkoopse_plassen_schippersgat/20220705_110002_SV2-01_SV_RD_11bit_RGBI_50cm_Woerden_Schippersgat_cropped_ndwi_re_ndvi.tif
20220705_annotations
'class'
Found some empty pixels!
C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20210617_waterplant_annotations.geojson
E:/data/nieuwkoopse_plassen_schippersgat/20210617_111532_SV1-04_SV_RD_11bit_RGBI_50cm_Mijdrecht_Schippersgat_cropped_ndwi_re_ndvi.tif
20210617_annotations
'class'
Found some empty pixels!
C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20190302_waterplants_annotations.geojson
E:/data/nieuwkoopse_plassen_schippersgat/20190302_113613_SV1-02_50cm_RD_11bit_RGBI_Mijdrecht_Schippersgat_cropped_ndwi_re_ndvi.tif
20190302_annotations
'class'
Found some empty pixels!
C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20190409_waterplant_annotations.geoj

In [17]:
df_schippers = pd.concat(dfs_schippersgat)
df_schippers

Unnamed: 0,r,g,b,i,ndwi,re_ndvi,rd_x,rd_y,label,image,date,season,annotation_no
0,1171.0,753.0,630.0,433.0,126.0,19.0,117066.75,463853.75,Water,20220705_110002_SV2-01_SV_RD_11bit_RGBI_50cm_W...,20220705_110002,Summer,0_20220705_annotations
1,1107.0,693.0,562.0,386.0,128.0,20.0,117067.25,463853.75,Water,20220705_110002_SV2-01_SV_RD_11bit_RGBI_50cm_W...,20220705_110002,Summer,0_20220705_annotations
2,1101.0,687.0,558.0,383.0,128.0,20.0,117067.75,463853.75,Water,20220705_110002_SV2-01_SV_RD_11bit_RGBI_50cm_W...,20220705_110002,Summer,0_20220705_annotations
3,1180.0,762.0,640.0,440.0,126.0,19.0,117066.75,463853.25,Water,20220705_110002_SV2-01_SV_RD_11bit_RGBI_50cm_W...,20220705_110002,Summer,0_20220705_annotations
4,1174.0,757.0,636.0,437.0,126.0,19.0,117067.25,463853.25,Water,20220705_110002_SV2-01_SV_RD_11bit_RGBI_50cm_W...,20220705_110002,Summer,0_20220705_annotations
...,...,...,...,...,...,...,...,...,...,...,...,...,...
77882,448.0,533.0,280.0,1207.0,61.0,23.0,117204.25,463648.75,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations
77883,437.0,519.0,267.0,1162.0,61.0,24.0,117204.75,463648.75,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations
77884,434.0,516.0,264.0,1150.0,61.0,24.0,117205.25,463648.75,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations
77885,445.0,529.0,277.0,1193.0,61.0,24.0,117204.25,463648.25,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations


In [18]:
df_schippers['ndvi'] = df_schippers['re_ndvi']

In [19]:
df = pd.concat([df,df_schippers])

# Done with optional

In [20]:
df.sort_values("date")['date'].unique()

array(['20190302_113613', '20190409_111030', '20190416_113410',
       '20190629_113712', '20190826_111247', '20191003_113732',
       '20200713_111443', '20200713_111445', '20200812_111845',
       '20210423_111111', '20210614_111316', '20210614_111318',
       '20210617_111532', '20210617_111534', '20220318_114608',
       '20220705_110002', '20220803_105908', '20220811_110734',
       '20221012_104900'], dtype=object)

In [21]:
df

Unnamed: 0,r,g,b,i,ndwi,ndvi,rd_x,rd_y,label,image,date,season,annotation_no,re_ndvi
0,266.0,215.0,125.0,126.0,126.0,64.0,115102.25,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
1,267.0,215.0,125.0,126.0,126.0,64.0,115102.75,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
2,268.0,216.0,125.0,127.0,125.0,64.0,115103.25,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
3,268.0,216.0,125.0,126.0,126.0,63.0,115103.75,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
4,267.0,215.0,124.0,124.0,126.0,63.0,115104.25,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77882,448.0,533.0,280.0,1207.0,61.0,23.0,117204.25,463648.75,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations,23.0
77883,437.0,519.0,267.0,1162.0,61.0,24.0,117204.75,463648.75,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations,24.0
77884,434.0,516.0,264.0,1150.0,61.0,24.0,117205.25,463648.75,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations,24.0
77885,445.0,529.0,277.0,1193.0,61.0,24.0,117204.25,463648.25,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations,24.0


In [22]:
df['label'] = df['label'].replace("Waterplanten", "Waterplants")

In [23]:
df['ndvi'] = df['ndvi'].fillna(df['re_ndvi'])

In [24]:
df.drop(["image","season","annotation_no", "date"], axis=1).groupby(["label"]).mean()

Unnamed: 0_level_0,r,g,b,i,ndwi,ndvi,rd_x,rd_y,re_ndvi
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Ground,521.787231,443.421122,336.218234,967.055155,63.468138,105.457218,115391.506344,462059.323077,25.023556
Water,548.851811,395.148153,286.177694,248.183647,120.202363,59.266427,114974.033057,462122.329404,46.027365
Waterplants,661.289071,641.028224,431.797813,1182.226397,69.713331,71.393654,116019.243137,462923.233467,20.419705


In [25]:
df

Unnamed: 0,r,g,b,i,ndwi,ndvi,rd_x,rd_y,label,image,date,season,annotation_no,re_ndvi
0,266.0,215.0,125.0,126.0,126.0,64.0,115102.25,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
1,267.0,215.0,125.0,126.0,126.0,64.0,115102.75,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
2,268.0,216.0,125.0,127.0,125.0,64.0,115103.25,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
3,268.0,216.0,125.0,126.0,126.0,63.0,115103.75,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
4,267.0,215.0,124.0,124.0,126.0,63.0,115104.25,463270.25,Water,20220318_114608_SV1-02_SV_RD_11bit_RGBI_50cm_B...,20220318_114608,Spring,0_20220318_annotations,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77882,448.0,533.0,280.0,1207.0,61.0,23.0,117204.25,463648.75,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations,23.0
77883,437.0,519.0,267.0,1162.0,61.0,24.0,117204.75,463648.75,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations,24.0
77884,434.0,516.0,264.0,1150.0,61.0,24.0,117205.25,463648.75,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations,24.0
77885,445.0,529.0,277.0,1193.0,61.0,24.0,117204.25,463648.25,Waterplants,20190629_113712_SV1-02_50cm_RD_11bit_RGBI_Mijd...,20190629_113712,Summer,13_20190629_annotations,24.0


In [26]:
df = df.drop(['re_ndvi'],axis=1)

In [27]:
df[(df["rd_y"]  == 463906.25) & (df["rd_x"] == 117219.25)]

Unnamed: 0,r,g,b,i,ndwi,ndvi,rd_x,rd_y,label,image,date,season,annotation_no
9982,646.0,458.0,358.0,273.0,125.0,59.0,117219.25,463906.25,Water,20221012_104900_SV2-01_SV_RD_11bit_RGBI_50cm_N...,20221012_104900,Fall,2_20221012_annotations


In [28]:
df['label'].value_counts()

label
Water          3814785
Ground         2542204
Waterplants     235613
Name: count, dtype: int64

In [47]:
df.to_parquet("./annotations_pixel_dataframes/Superview_waterplanten_annotations.parquet")

In [48]:
# Upload annotations to the cloud
upload_to_blob("./annotations_pixel_dataframes/Superview_waterplanten_annotations.parquet", "Nieuwkoopse_Plassen/")

# Create custom tif files for models tests.

In [49]:
annotation_folder_path

test_output_dir = "E:/output/test/Nieuwkoopse_plassen/"

In [50]:
from rasterio.mask import mask

def crop_raster_based_on_gdf(agdf,input_raster_path,output_raster_path ):

    area_to_crop = agdf["geometry"]

    with rasterio.open(input_raster_path) as src:
        

        out_image, out_transform = rasterio.mask.mask(
            src, area_to_crop, crop=True, filled=True
        )
        out_profile = src.profile

        out_profile.update(
            {
                "driver": "GTiff",
                "interleave": "band",
                "tiled": True,
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform,
                "dtype": out_image.dtype 
            }
        )

        with rasterio.open(output_raster_path, "w", **out_profile) as dest:
                dest.write(out_image) 
                dest.close()

In [51]:
for afile in glob.glob(annotation_folder_path.replace("{to_replace}","")):
    afile = afile.replace("\\","/")
    print(afile)

    waterplanten_annotations = gpd.read_file(afile)
    waterplanten_annotations =waterplanten_annotations.to_crs("EPSG:28992")
    tif_file = glob.glob(tif_files_path+afile.split("/")[-1].split("_")[0]+"*ndwi*")[0].replace("\\","/")

    if "label" in waterplanten_annotations.columns:
        waterplanten_annotations["Label"] = waterplanten_annotations["label"]


    crop_raster_based_on_gdf(waterplanten_annotations[waterplanten_annotations['Label'] == "Water"][0:1], tif_file, test_output_dir+tif_file.split("/")[-1].replace(".tif","_Water_test.tif"))
    if len(waterplanten_annotations[waterplanten_annotations['Label'] == "Waterplants"]) > 0 :
        crop_raster_based_on_gdf(waterplanten_annotations[waterplanten_annotations['Label'] == "Waterplants"][0:1], tif_file, test_output_dir+tif_file.split("/")[-1].replace(".tif","_Waterplants_test.tif"))
    crop_raster_based_on_gdf(waterplanten_annotations[waterplanten_annotations['Label'] == "Ground"][0:1], tif_file, test_output_dir+tif_file.split("/")[-1].replace(".tif","_Ground_test.tif"))

C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20190302_waterplants_annotations.geojson
C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20190409_waterplant_annotations.geojson
C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20190416_waterplants_annotations.geojson
C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20190629_waterplants_annotations.geojson
C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20210617_waterplant_annotations.geojson
C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20220705_waterplant_annotations.geojson
C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20230603_waterplants_annotations.geojson
C:/repos/satellite-images-nso-datascience/data/annotations/Schippergat/20230905_waterplanten_annotations.geojson


KeyError: 'Label'

# 