In [None]:
%load_ext autoreload
%autoreload 2

In [20]:
import glob
import pandas as pd
import rasterio
from nso_ds_classes.nso_ds_normalize_scaler import scaler_class_all
from annotations.data_preparation import extract_dataframe_pixels_values_from_tif_and_polygons
from annotations.utils import get_scaler_filepath
from annotations.data_loader import load_annotations_polygons_gpkg
import os
import pickle

## Transform Polygon Annotations to Pixel Annotation Parquet files

This script is intended to transform given polygon annotations in geojson (made in i.e. QGis) into pixel level annotations, with scaled band values. The pixel level annotations are written to parquet files.
Change the variables below to match the situation on your device.
Note that these transformations are quite quickly very memory intensive.

Date: 2024-01-11 \
Author: Pieter Kouyzer, Michael de Winter


In [3]:
# Set variables
location = "coepelduynen"
images_folder = "E:/data/"
regex = f"{location}/2023*re*asphalt_crop.tif"
annotations_path = "C:/repos/satellite-images-nso-datascience/data/annotations/Coepelduynen/Annotations_Coepelduynen_2023.gpkg"

scaler_folder_path = "../../scalers/"

### Prepare data

In [3]:
annotations_polygons_gdf = load_annotations_polygons_gpkg("C:/repos/satellite-images-nso-datascience/data/annotations/Coepelduynen/Annotations_Coepelduynen_2023.gpkg")

In [4]:
annotations_polygons_gdf = annotations_polygons_gdf.reset_index(drop=True)

In [5]:
# Custom actions to set data straight.
annotations_polygons_gdf.loc[annotations_polygons_gdf["name"] != "Annotations_Coepelduynen_2023","label"] = annotations_polygons_gdf[annotations_polygons_gdf["name"] != "Annotations_Coepelduynen_2023"]["Label_name"]
annotations_polygons_gdf = annotations_polygons_gdf.drop(["Label_name"], axis=1)
annotations_polygons_gdf['label'] = annotations_polygons_gdf['label'].str.replace("\nAsphalt","Asphalt")
annotations_polygons_gdf["Label"] = annotations_polygons_gdf["label"]
annotations_polygons_gdf = annotations_polygons_gdf.drop(["label"], axis=1)

In [6]:
annotations_polygons_gdf

Unnamed: 0,geometry,name,Label
0,"POLYGON ((88197.082 470348.814, 88197.617 4703...",Annotations_Coepelduynen_2023,Sand
1,"POLYGON ((88393.545 470238.281, 88402.459 4702...",Annotations_Coepelduynen_2023,Vegetation
2,"POLYGON ((88300.127 470312.623, 88306.545 4703...",Annotations_Coepelduynen_2023,Sand
3,"POLYGON ((88338.457 470289.447, 88370.547 4702...",Annotations_Coepelduynen_2023,Vegetation
4,"POLYGON ((88320.607 470562.728, 88317.400 4705...",Annotations_Coepelduynen_2023,Vegetation
5,"POLYGON ((88193.748 470693.061, 88195.953 4706...",Annotations_Coepelduynen_2023,Asphalt
6,"POLYGON ((88197.355 470697.603, 88200.740 4707...",Annotations_Coepelduynen_2023,Asphalt
7,"POLYGON ((88189.205 470688.229, 88191.477 4706...",Annotations_Coepelduynen_2023,Asphalt
8,"POLYGON ((88642.619 471444.193, 88642.063 4714...",Annotations_Coepelduynen_2023,Asphalt
9,"POLYGON ((88697.153 471430.031, 88714.967 4714...",Annotations_Coepelduynen_2023,Sand


In [7]:
#Custom aggregation for annnotations across all satellite images
dfs = []
for tif_file in glob.glob(os.path.join(images_folder, regex)):
        tif_file = tif_file.replace("\\","/")
        print(tif_file)
        name_tif_file = tif_file.split("/")[-1].split(".")[0]
        with rasterio.open(tif_file) as dataset:
            dfs += [
               extract_dataframe_pixels_values_from_tif_and_polygons(
                        tif_dataset=dataset,
                        polygon_gdf=annotations_polygons_gdf[
                            annotations_polygons_gdf["name"] == "Annotations_Coepelduynen_2023"
                        ],
                        name_tif_file=tif_file.split("/")[-1],
                        name_annotations="Annotations_Coepelduynen_2023",
                    )          
            ]
        
df = pd.concat(dfs)

E:/data/coepelduynen/20230402_105321_PNEO-03_1_49_30cm_RD_12bit_RGBNED_Zoeterwoude_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
E:/data/coepelduynen/20230513_104139_PNEO-03_1_1_30cm_RD_12bit_RGBNED_NoordwijkAanZee_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
E:/data/coepelduynen/20230601_105710_PNEO-04_1_1_30cm_RD_12bit_RGBNED_Rijnsburg_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
E:/data/coepelduynen/20230908_110020_PNEO-04_1_1_30cm_RD_12bit_RGBNED_Rijnsburg_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
E:/data/coepelduynen/20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBNED_NoordwijkAanZee_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif


In [12]:
df.columns

Index(['r', 'g', 'b', 'n', 'e', 'd', 'ndvi', 're_ndvi', 'label', 'image',
       'date', 'season', 'annotation_no'],
      dtype='object')

In [9]:
# Annotations for specific satellite images
dfs = []
for tif_file in glob.glob(os.path.join(images_folder, regex)):
    tif_file = tif_file.replace("\\","/")
    print(tif_file)
    name_tif_file = tif_file.split("/")[-1].split(".")[0].split("_")[0]+"_annotations"
    print(name_tif_file)
    with rasterio.open(tif_file) as dataset:
            dfs += [
               extract_dataframe_pixels_values_from_tif_and_polygons(
                        tif_dataset=dataset,
                        polygon_gdf=annotations_polygons_gdf[
                            annotations_polygons_gdf["name"] == name_tif_file
                        ],
                        name_tif_file=tif_file.split("/")[-1],
                        name_annotations=name_tif_file,
                    )          
            ]
    
 
dfs = pd.concat(dfs)
df = pd.concat([dfs,df])

E:/data/coepelduynen/20230402_105321_PNEO-03_1_49_30cm_RD_12bit_RGBNED_Zoeterwoude_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
20230402_annotations
E:/data/coepelduynen/20230513_104139_PNEO-03_1_1_30cm_RD_12bit_RGBNED_NoordwijkAanZee_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
20230513_annotations
E:/data/coepelduynen/20230601_105710_PNEO-04_1_1_30cm_RD_12bit_RGBNED_Rijnsburg_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
20230601_annotations
E:/data/coepelduynen/20230908_110020_PNEO-04_1_1_30cm_RD_12bit_RGBNED_Rijnsburg_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
20230908_annotations
E:/data/coepelduynen/20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBNED_NoordwijkAanZee_natura2000_coepelduynen_cropped_ndvi_re_ndvi_asphalt_crop.tif
20230910_annotations


In [11]:
df.to_parquet("annotations_pixel_dataframes/annotaties_coepelduynen_to_pixel_2023.parquet")

### Normalise DataFrame through Scalers

In [5]:
df = pd.read_parquet("annotations_pixel_dataframes/annotaties_coepelduynen_to_pixel_2023.parquet")

In [6]:
df_scaled = df.copy()

In [25]:
for image_date in df_scaled['date'].unique():
   
    
    print(image_date)
    loaded_data = pickle.load(open([file for file in glob.glob("../../scalers/"+str(image_date.split("_")[0])+"*StandardScaler.pkl")][0], 'rb'))


    df_scaled[df_scaled['date'] == image_date] = loaded_data.transform(
        df_scaled[df_scaled['date'] == image_date]
    )

20230513_104139


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a c

20230402_105321


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a c

20230601_105710


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a c

20230908_110020


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a c

20230910_105008


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pixel_df[column_name] = self.scaler_bands[xi].transform(
A value is trying to be set on a c

In [26]:
df_scaled

Unnamed: 0,r,g,b,n,e,d,ndvi,re_ndvi,label,image,date,season,annotation_no
0,4.395348,3.620568,3.052421,2.838405,3.229714,2.501019,1.413646,1.397541,Sand,20230513_104139_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230513_104139,Spring,31_20230513_annotations
1,4.646989,3.761395,3.146375,2.937408,3.372237,2.533248,1.398225,1.380507,Sand,20230513_104139_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230513_104139,Spring,31_20230513_annotations
2,4.512391,3.685565,3.094178,2.730402,3.206513,2.505048,1.382805,1.380507,Sand,20230513_104139_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230513_104139,Spring,31_20230513_annotations
3,4.184672,3.468909,2.937589,2.350143,2.881694,2.412390,1.336543,1.346439,Sand,20230513_104139_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230513_104139,Spring,31_20230513_annotations
4,4.091038,3.425577,2.906271,2.293891,2.818719,2.400304,1.336543,1.363473,Sand,20230513_104139_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230513_104139,Spring,31_20230513_annotations
...,...,...,...,...,...,...,...,...,...,...,...,...,...
156367,2.077479,1.989473,1.895106,1.785769,1.947980,1.801029,1.639892,1.665139,Vegetation,20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230910_105008,Fall,30_Annotations_Coepelduynen_2023
156368,2.055305,1.974723,1.890705,1.736869,1.909929,1.801029,1.622816,1.665139,Vegetation,20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230910_105008,Fall,30_Annotations_Coepelduynen_2023
156369,1.794754,1.792813,1.780675,1.345673,1.582694,1.739418,1.571588,1.626706,Vegetation,20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230910_105008,Fall,30_Annotations_Coepelduynen_2023
156370,1.850190,1.832145,1.802681,1.443472,1.658796,1.753110,1.588664,1.645923,Vegetation,20230910_105008_PNEO-03_1_1_30cm_RD_12bit_RGBN...,20230910_105008,Fall,30_Annotations_Coepelduynen_2023


In [27]:
#save scaled dataframe
df_scaled.to_parquet("annotations_pixel_dataframes/annotaties_coepelduynen_to_pixel_2023_scaled.parquet")