#Import libraries

In [0]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString, MultiPolygon
import os
import glob
from shapely import geometry
import numpy as np
import time
import shutil

#Import cloud files

In [0]:
inPath = "/dbfs/mnt/strukturparametre/forestblocks_planet/unprocessed/"  # classification polygons folder path
gpkg_pattern = os.path.join(inPath, '*.shp')
blockpaths = glob.glob(gpkg_pattern)
gdf_nofor = gpd.read_file("/dbfs/mnt/strukturparametre/Non_forest_eraser_repaired.shp")  #input non forest layer
gdf_nofor = gpd.GeoDataFrame(gdf_nofor, crs="EPSG:25832")


In [0]:
blockpaths

#Define buffer in and out corrections

In [0]:
def buffer_in_correction(bfr_in_input):
    
    bfr_gdf = bfr_in_input.explode(index_parts=True)
    bfr_gdf = gpd.GeoDataFrame(gpd.GeoSeries(bfr_gdf))
    
    bfr_gdf = bfr_gdf.rename(columns={0:'geometry'}).set_geometry('geometry')
        
    
    bfr_gdf = bfr_gdf.set_crs(25832)
    return bfr_gdf
def buffer_out_correction(bfr_out_output):
    bfr_gdf = gpd.geoseries.GeoSeries([geom for geom in bfr_out_output.unary_union.geoms])
    bfr_gdf = bfr_gdf.set_crs(25832)
    return bfr_gdf



# Define deliniation

In [0]:
def delineation_process(filename):
    
    filep = filename
    filen = os.path.splitext(os.path.basename(filename))[0] 
    print('reading', filen)
    gdf_forest = gpd.read_file(filep)
    
    gdf_forest = gpd.GeoDataFrame(gdf_forest, crs="EPSG:25832")  
    
    #acquiring filename from filepath
    
    

    st= time.time()

 
   
        
    #getting extent of gdf_forestd geodataframe in order for the non forest to be clipped in that extent
    
    extent= gdf_forest.total_bounds
    [xmin, ymin, xmax, ymax] = gdf_forest.total_bounds
    
    p1 = geometry.Point(xmin,ymin)
    p2 = geometry.Point(xmax,ymin)
    p3 = geometry.Point(xmax,ymax)
    p4 = geometry.Point(xmin,ymax)

    pointList = [p1, p2, p3, p4]

    poly = geometry.Polygon(pointList)

    extent = gpd.GeoSeries.from_wkt([poly.wkt])

    mask_gpd = extent.set_crs("EPSG:25832")


    nonfor_clip = gdf_nofor.clip(mask_gpd, keep_geom_type=True)
    
    gdf_forest = gdf_forest.to_crs(25832).buffer(10)
    

    gdf_forest = buffer_out_correction(gdf_forest)
    
        
    gdf_forest = gdf_forest.to_crs(25832).buffer(-20)
    

    gdf_forest = buffer_in_correction(gdf_forest)
    

    print('1st buffer 20m in for',filen)



    # Buffer out 20 3rd
    gdf_forest = gdf_forest.to_crs(25832).buffer(20)
    gdf_forest = buffer_out_correction(gdf_forest)
 
    # Buffer_in_10_2 4th
    gdf_forest = gdf_forest.to_crs(25832).buffer(-10)
    
    gdf_forest = buffer_in_correction(gdf_forest)
    
    #Buffer out 10 5th
    
    gdf_forest = gdf_forest.to_crs(25832).buffer(10)
    gdf_forest = buffer_out_correction(gdf_forest)

    # Buffer_in_10_3 6th

    gdf_forest = gdf_forest.to_crs(25832).buffer(-10)
    gdf_forest = buffer_in_correction(gdf_forest)
    
    
            # Difference_final
    gdf_forest = gdf_forest.overlay(nonfor_clip, how='difference', make_valid=True)
    
    print('difference_final')

        # Buffer_in_10_4 8th
    gdf_forest = gdf_forest.to_crs(25832).buffer(-10)
    
    gdf_forest = buffer_in_correction(gdf_forest)
    

    # Buffer_out_10_3 9nth
    gdf_forest = gdf_forest.to_crs(25832).buffer(10)
    
    gdf_forest = gpd.geoseries.GeoSeries([geom for geom in gdf_forest.unary_union.geoms])
    
    print('fix union')
    
    gdf_forest = gdf_forest.set_crs('epsg:25832')


    
    
    gdf_forest = gdf_forest.explode(index_parts=True)

    print('explode')

    gdf_forest = gpd.GeoDataFrame(gpd.GeoSeries(gdf_forest))
    
    gdf_forest = gdf_forest.rename(columns={0:'geometry'}).set_geometry('geometry')
    # Field calculator
    gdf_forest['area_ha'] = gdf_forest.area/10000
    

    # Extract by attribute
    forest = gdf_forest[gdf_forest['area_ha'] >= 0.5]
    
    #output of delineated forest block
    
    forest.to_file(f"/tmp/forest_{filen}.gpkg", layer=f'forest_{filen}', driver="GPKG")
    shutil.move(f"/tmp/forest_{filen}.gpkg", f"/dbfs/mnt/strukturparametre/forestblocks_planet_delineated/forest_{filen}.gpkg" )
    
    et = time.time()
    total_time = et - st
    print(total_time, 's , elapsed_time')
    return forest

##Paralellize deliniation per cloud-file

In [0]:
file_rdd = sc.parallelize(treepaths, len(treepaths))
write_files_rdd = file_rdd.map(lambda x : delineation_process(x))
write_files_rdd.collect()