In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import glob
import pandas as pd
import rasterio
from nso_ds_classes.nso_ds_normalize_scaler import scaler_class_all
from annotations.data_preparation import extract_dataframe_pixels_values_from_tif_and_polygons
from annotations.utils import get_scaler_filepath
from annotations.data_loader import load_annotations_polygons
import os

## Transform Polygon Annotations to Pixel Annotation Parquet files

This script is intended to transform given polygon annotations in geojson (made in i.e. QGis) into pixel level annotations, with scaled band values. The pixel level annotations are written to parquet files.
Change the variables below to match the situation on your device.
Note that these transformations are quite quickly very memory intensive.

Date: 2024-01-11 \
Author: Pieter Kouyzer


In [None]:
# Set variables
location = "Voornes Duin"
images_folder = "E:/Data/remote_sensing/satellite-images/"
regex = f"*{location}*ndvi_height_asphalt_crop.tif"
annotations_folder = "C:/Users/pzhadmin/Data/remote-sensing/annotations/"
annotations_polygon_filename_regex = "annotaties_VoornesDuin_gecorrigeerd_100124_3_labels.geojson"
scaler_folder_path = "../../scalers/"

In [None]:
pixel_filepath = os.path.join(annotations_folder, f"{location}_pixel_annotations.parquet")
pixel_scaled_filepath = os.path.join(annotations_folder, f"{location}_pixel_annotations_scaled.parquet")

### Prepare data

In [None]:
annotations_polygons_gdf = load_annotations_polygons(annotations_folder, annotations_polygon_filename_regex, regex, images_folder)

In [None]:
if os.path.isfile(pixel_filepath):
    df = pd.read_parquet(pixel_filepath)
else:
    dfs = []
    for tif_file in glob.glob(os.path.join(images_folder, regex)):
        tif_file = tif_file.replace("\\","/")
        print(tif_file)
        name_tif_file = tif_file.split("/")[-1].split(".")[0]
        with rasterio.open(tif_file) as dataset:
            dfs += [
                extract_dataframe_pixels_values_from_tif_and_polygons(
                    tif_dataset=dataset, polygon_gdf=annotations_polygons_gdf,name_tif_file=name_tif_file
                )
            ]
        
    df = pd.concat(dfs)
    df.to_parquet(pixel_filepath)

### Normalise DataFrame through Scalers

In [None]:
df_scaled = df.copy()

In [None]:
for image_date in df_scaled['date'].unique():
    print(image_date)
    
    a_normalize_scaler_class_all = scaler_class_all(
        **{
            f"scaler_file_band{band}": get_scaler_filepath(scaler_folder_path, image_date, location, band) for band in range(1,7)
        }
    )
    
    df_scaled[df_scaled['date'] == image_date] = a_normalize_scaler_class_all.transform(
        df_scaled[df_scaled['date'] == image_date], col_names=["r","g","b","i",'ndvi','height']
    )

In [None]:
#save scaled dataframe
df_scaled.to_parquet(pixel_scaled_filepath)