# The TIF model iterator using pyspark

This notebook implements a tif iterator to use a sklearn model to predict every pixel in a raster .tif file.
This is done in a distributed way with pyspark because of the large number of pixels and time it will take without it.

It then outputs a segmentations shapefile which can be used in a GIS application to see the results.

Note MinMaxScalers have to be made in order to predict because of the differences between images.

Author: Michael de Winter

In [0]:
import rasterio
import pandas as pd
import itertools
import numpy as np
import glob
import joblib
from pyspark.sql.functions import udf
import pickle
from shapely.geometry import Polygon, Point
from timeit import default_timer as timer
import geopandas as gpd
import os
from pyspark.sql.types import DoubleType, StringType, ArrayType
import glob
import shutil

###### run the below command to import all the required functions

In [0]:
%run ./tif_model_iterator_functions

###Run settings

In [0]:
# Coepelduynen
path_to_output = "/dbfs/mnt/satellite-images-nso/model_out_coepelduynen/"
path_to_scalers = "/dbfs/mnt/satellite-images-nso/SV_50cm/coepelduynen/scalers/"
parts = 2
model_path = "/dbfs/mnt/satellite-images-nso/models/randomforest_classifier_coepelduynen_contrast_annotations_grid_search_all_data_2019_2022_small_balanced_v1.3.sav" 

In [0]:
check_done_files()

### Coepelduynen

In [0]:
for file  in glob.glob("/dbfs/mnt/satellite-images-nso/SV_50cm/coepelduynen/*.tif"):
  print(file.split("/")[-1].split("_")[0])

In [0]:
# Predict all 
for file  in glob.glob("/dbfs/mnt/satellite-images-nso/SV_50cm/coepelduynen/*.tif"):
  if file.split("/")[-1].split("_")[0] not in set(check_done_files()):
    try:
      print(file)
      run_tif_model_implementer(file,path_to_output,path_to_scalers,parts,model_path, aggregate_to_2m = False)
    except Exception as e:
      print(e)
  else:
      print(file.split("/")[-1].split("_")[0]+" already done")

In [0]:
date_annotations = [20190601, 20200625, 20200731, 20200915, 20210709, 20210815,
       20210907, 20220515, 20220922]

In [0]:
check_done_files()

In [0]:
for date in date_annotations:
  print(date)
  
  file = glob.glob("/dbfs/mnt/satellite-images-nso/SV_50cm/coepelduynen/"+str(date)+"*.tif")[0]
  if file.split("/")[-1].split("_")[0] not in set(check_done_files()):
    print("File not done")
    run_tif_model_implementer(file,path_to_output,path_to_scalers,parts,model_path, aggregate_to_2m = False)
  else:
    print("File done")
    

In [0]:
for file  in glob.glob("/dbfs/mnt/satellite-images-nso/SV_50cm/coepelduynen/*"):
  print(file)

In [0]:
%sh

ls /dbfs/mnt/annotations/coepelduynen

In [0]:
dates_annotations = [file.split(" ")[-1].split(".")[0] for file in glob.glob("/dbfs/mnt/annotations/coepelduynen/*.gpkg")]

In [0]:
dates_annotations

In [0]:
dates_annotations.append("20200508")

In [0]:
dates_annotations

In [0]:
# For the annotations.
for date in dates_annotations:
  path_to_tif_file = glob.glob('/dbfs/mnt/satellite-images-nso/SV_50cm/coepelduynen/'+date+'*')[0]
  print(path_to_tif_file)
  run_tif_model_implementer(path_to_tif_file,path_to_output,path_to_scalers,parts,model_path)

In [0]:
for date in dates_annotations:
  path_to_tif_file = glob.glob('/dbfs/mnt/satellite-images-nso/SV_50cm/coepelduynen/'+date+'*')[0]
  print(path_to_tif_file)

In [0]:
run_tif_model_implementer("/dbfs/mnt/satellite-images-nso/SV_50cm/coepelduynen/20210907_112017_SV1-04_SV_RD_11bit_RGBI_50cm_KatwijkAanZee_natura2000_coepelduynen_cropped_ndvi_height.tif",path_to_output,path_to_scalers,parts,model_path, aggregate_to_2m = False)

In [0]:
import os

for file in glob.glob("/dbfs/mnt/satellite-images-nso/model_out_coepelduynen/*v1.2*"):
  print(file)
  os.rename(file,file.split("_coepelduynen_contrast_annotations_")[0]+file.split("_coepelduynen_contrast_annotations_")[-1].split(".sav")[-1])

In [0]:
for file in glob.glob("/dbfs/mnt/satellite-images-nso/model_out_coepelduynen/*v1.2*"):
  print(file)

In [0]:
# All files.
for path_to_tif_file in glob.glob("/dbfs/mnt/satellite-images-nso/SV_50cm/coepelduynen/*"):
  print(path_to_tif_file)
  run_tif_model_implementer(path_to_tif_file,path_to_output,path_to_scalers,parts,model_path, aggregate_to_2m= False)

In [0]:
%sh

ls /dbfs/mnt/satellite-images-nso/model_out_coepelduynen