In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
import glob
import pandas as pd
import rasterio
from nso_ds_classes.nso_ds_normalize_scaler import scaler_class_all
from annotations.data_preparation import extract_dataframe_pixels_values_from_tif_and_polygons
from annotations.utils import get_scaler_filepath
from  annotations.data_loader import load_annotations_polygons_gpkg
import os

## Transform Polygon Annotations to Pixel Annotation Parquet files

This script is intended to transform given polygon annotations in geojson (made in i.e. QGis) into pixel level annotations, with scaled band values. The pixel level annotations are written to parquet files.
Change the variables below to match the situation on your device.
Note that these transformations are quite quickly very memory intensive.

Date: 2024-01-11 \
Author: Pieter Kouyzer, Michael de Winter


In [2]:
# Set variables
location = "coepelduynen"
images_folder = "E:/data/"
regex = f"{location}/2023*re*asphalt_crop.tif"
annotations_path = "C:/repos/satellite-images-nso-datascience/data/annotations/Coepelduynen/Annotations_Coepelduynen_2023.gpkg"

scaler_folder_path = "../../scalers/"

### Prepare data

In [3]:
annotations_polygons_gdf = load_annotations_polygons_gpkg("C:/repos/satellite-images-nso-datascience/data/annotations/Coepelduynen/Annotations_Coepelduynen_2023.gpkg")

In [5]:
annotations_polygons_gdf = annotations_polygons_gdf.reset_index(drop=True)

In [6]:
# Custom actions to set data straight.
annotations_polygons_gdf.loc[annotations_polygons_gdf["name"] != "Annotations_Coepelduynen_2023","label"] = annotations_polygons_gdf[annotations_polygons_gdf["name"] != "Annotations_Coepelduynen_2023"]["Label_name"]
annotations_polygons_gdf = annotations_polygons_gdf.drop(["Label_name"], axis=1)
annotations_polygons_gdf['label'] = annotations_polygons_gdf['label'].str.replace("\nAsphalt","Asphalt")
annotations_polygons_gdf["Label"] = annotations_polygons_gdf["label"]
annotations_polygons_gdf = annotations_polygons_gdf.drop(["label"], axis=1)

In [7]:
annotations_polygons_gdf

Unnamed: 0,geometry,name,Label
0,"POLYGON ((88197.082 470348.814, 88197.617 4703...",Annotations_Coepelduynen_2023,Sand
1,"POLYGON ((88393.545 470238.281, 88402.459 4702...",Annotations_Coepelduynen_2023,Vegetation
2,"POLYGON ((88300.127 470312.623, 88306.545 4703...",Annotations_Coepelduynen_2023,Sand
3,"POLYGON ((88338.457 470289.447, 88370.547 4702...",Annotations_Coepelduynen_2023,Vegetation
4,"POLYGON ((88320.607 470562.728, 88317.400 4705...",Annotations_Coepelduynen_2023,Vegetation
5,"POLYGON ((88193.748 470693.061, 88195.953 4706...",Annotations_Coepelduynen_2023,Asphalt
6,"POLYGON ((88197.355 470697.603, 88200.740 4707...",Annotations_Coepelduynen_2023,Asphalt
7,"POLYGON ((88189.205 470688.229, 88191.477 4706...",Annotations_Coepelduynen_2023,Asphalt
8,"POLYGON ((88642.619 471444.193, 88642.063 4714...",Annotations_Coepelduynen_2023,Asphalt
9,"POLYGON ((88697.153 471430.031, 88714.967 4714...",Annotations_Coepelduynen_2023,Sand


In [8]:
#Custom aggregation for annnotations across all satellite images
for tif_file in glob.glob(os.path.join(images_folder, regex)):
        tif_file = tif_file.replace("\\","/")
        print(tif_file)
        name_tif_file = tif_file.split("/")[-1].split(".")[0]
        with rasterio.open(tif_file) as dataset:
            dfs += [
               extract_dataframe_pixels_values_from_tif_and_polygons(
                        tif_dataset=dataset,
                        polygon_gdf=annotations_polygons_gdf[
                            annotations_polygons_gdf["name"] == "Annotations_Coepelduynen_2023"
                        ],
                        name_tif_file=tif_file.split("/")[-1],
                        name_annotations="Annotations_Coepelduynen_2023",
                    )          
            ]
        
df = pd.concat(dfs)


E:/data/coepelduynen/20230402_105321_PNEO-03_1_49_30cm_RD_12bit_RGBNED_Zoeterwoude_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
E:/data/coepelduynen/20230513_104139_PNEO-03_1_1_30cm_RD_12bit_RGBNED_NoordwijkAanZee_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
E:/data/coepelduynen/20230601_105710_PNEO-04_1_1_30cm_RD_12bit_RGBNED_Rijnsburg_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
E:/data/coepelduynen/20230908_110020_PNEO-04_1_1_30cm_RD_12bit_RGBNED_Rijnsburg_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
E:/data/coepelduynen/20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBNED_NoordwijkAanZee_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif


In [17]:
# Annotations for specific satellite images
dfs = []
for tif_file in glob.glob(os.path.join(images_folder, regex)):
    tif_file = tif_file.replace("\\","/")
    print(tif_file)
    name_tif_file = tif_file.split("/")[-1].split(".")[0].split("_")[0]+"_annotations"
    print(name_tif_file)
    with rasterio.open(tif_file) as dataset:
            dfs += [
               extract_dataframe_pixels_values_from_tif_and_polygons(
                        tif_dataset=dataset,
                        polygon_gdf=annotations_polygons_gdf[
                            annotations_polygons_gdf["name"] == name_tif_file
                        ],
                        name_tif_file=tif_file.split("/")[-1],
                        name_annotations=name_tif_file,
                    )          
            ]
    
 
dfs = pd.concat(dfs)
df = pd.concat([dfs,df])

E:/data/coepelduynen/20230402_105321_PNEO-03_1_49_30cm_RD_12bit_RGBNED_Zoeterwoude_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
20230402_annotations
E:/data/coepelduynen/20230513_104139_PNEO-03_1_1_30cm_RD_12bit_RGBNED_NoordwijkAanZee_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
20230513_annotations
E:/data/coepelduynen/20230601_105710_PNEO-04_1_1_30cm_RD_12bit_RGBNED_Rijnsburg_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
20230601_annotations
E:/data/coepelduynen/20230908_110020_PNEO-04_1_1_30cm_RD_12bit_RGBNED_Rijnsburg_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
20230908_annotations
E:/data/coepelduynen/20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBNED_NoordwijkAanZee_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
20230910_annotations


In [30]:
df

Unnamed: 0,r,g,b,n,e,d,ndvi,re_ndvi,label,image,date,season
0,807.0,710.0,621.0,1013.0,965.0,675.0,111.0,108.0,Sand,20230402_105321_PNEO-03_1_49_30cm_RD_12bit_RGB...,20230402_105321,Spring
1,807.0,713.0,625.0,1023.0,967.0,678.0,111.0,109.0,Sand,20230402_105321_PNEO-03_1_49_30cm_RD_12bit_RGB...,20230402_105321,Spring
2,792.0,703.0,620.0,1003.0,950.0,673.0,111.0,109.0,Sand,20230402_105321_PNEO-03_1_49_30cm_RD_12bit_RGB...,20230402_105321,Spring
3,784.0,697.0,618.0,992.0,941.0,671.0,111.0,109.0,Sand,20230402_105321_PNEO-03_1_49_30cm_RD_12bit_RGB...,20230402_105321,Spring
4,861.0,752.0,647.0,1071.0,1036.0,685.0,110.0,109.0,Sand,20230402_105321_PNEO-03_1_49_30cm_RD_12bit_RGB...,20230402_105321,Spring
...,...,...,...,...,...,...,...,...,...,...,...,...
156367,471.0,517.0,558.0,862.0,658.0,691.0,129.0,116.0,Vegetation,20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230910_105008,Fall
156368,467.0,514.0,557.0,844.0,648.0,691.0,128.0,116.0,Vegetation,20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230910_105008,Fall
156369,420.0,477.0,532.0,700.0,562.0,673.0,125.0,114.0,Vegetation,20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230910_105008,Fall
156370,430.0,485.0,537.0,736.0,582.0,677.0,126.0,115.0,Vegetation,20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230910_105008,Fall


### Normalise DataFrame through Scalers

In [None]:
df_scaled = df.copy()

In [None]:
for image_date in df_scaled['date'].unique():
    print(image_date)
    
    a_normalize_scaler_class_all = scaler_class_all(
        **{
            f"scaler_file_band{band}": get_scaler_filepath(scaler_folder_path, image_date, location, band) for band in range(1,7)
        }
    )
    
    df_scaled[df_scaled['date'] == image_date] = a_normalize_scaler_class_all.transform(
        df_scaled[df_scaled['date'] == image_date], col_names=["r","g","b","i",'ndvi','height']
    )

In [None]:
#save scaled dataframe
df_scaled.to_parquet(pixel_scaled_filepath)