## Code to sample the dataset

This code was used to create  square polygons to sample the dataset in two ways:

- Grid;

- Random.

The code was adapted from keras-spatial library (https://pypi.org/project/keras-spatial/).

Then, QGIS (select-by-location) was used to select the polygons that interserct the landslides.

In [None]:
# Import libraries
import geopandas as gpd
import numpy as np
from shapely.geometry import box
import os
import rasterio as rio

In [None]:
# Define the class SampleShapefile that will generate the random and regular samples.
class SampleShapefile():
    """
    Class SampleShapeFile:
    
    Attributes:
    imagepath (string) = path to the .tif image;
    
    Methods:
    generateShapefileRandomGrid => (geopandas.GeoDataFrame) 
    generateShapefileRegularGrid =>(geopandas.GeoDataFrame)
    
    """
    def __init__(self,pathToImage):
        # Define the imagepath variable with the path to the .tif image
        self.imagepath = pathToImage
        # Define the class atribute the image varible that opens the .tif image
        self.image = rio.open(self.imagepath)
        # Get the metadata, crs and bounds of the TifImage
        with self.image as TifImage:
            self.meta = TifImage.meta
            print(-self.meta["transform"][4])
            self.crs = TifImage.crs
            self.xmax = TifImage.bounds[2]
            self.xmin = TifImage.bounds[0]
            self.ymax = TifImage.bounds[3]
            self.ymin = TifImage.bounds[1]
            
    def generateShapefileRandomGrid(self,xsize, ysize, count, save = False, savePath = None):
        """
        # Code addapted from Keras-spatial library - https://pypi.org/project/keras-spatial/ 
        Generate random grid over extent.
        Args:
          xmin (float): extent left boundary
          ymin (float): extent bottom boundary
          xmax (float): extent right boundary
          ymax (float): extent top boundary
          xsize (float): patch width
          ysize (float): patch height
          count (int): number of patches
          crs (CRS): crs to assign geodataframe 
        Returns:
          :obj:`geopandas.GeoDataFrame`:
        """
        #Define the pixel size and multiply by the xsize to get xsize in number of pixels
        xsize = self.meta["transform"][0]*xsize
        ysize = self.meta["transform"][4]*ysize*-1
        
        x = np.random.rand(count) * (self.xmax-self.xmin-xsize) + self.xmin
        y = np.random.rand(count) * (self.ymax-self.ymin-ysize) + self.ymin
        polys = [box(x, y, x+xsize, y+ysize) for x,y in np.nditer([x,y])]

        gdf = gpd.GeoDataFrame({'geometry':polys})
        gdf.crs = self.crs
        if save == False:
            return gdf
        else:
            os.chdir(savePath)
            gdf.to_file("random_grid" + ".geojson", driver="GeoJSON")
            
            
    
    
    def generateShapefileRegularGrid(self,xsize, ysize, overlap=0,save = False, savePath = None):
        """
        # Code addapted from Keras-spatial library - https://pypi.org/project/keras-spatial/ 
        Generate regular grid over extent.
        Args:
          xmin (float): extent left boundary
          ymin (float): extent bottom boundary
          xmax (float): extent right boundary
          ymax (float): extent top boundary
          xsize (float): patch width
          ysize (float): patch height
          overlap (float): percentage of patch overlap (optional)
          crs (CRS): crs to assign geodataframe 
        Returns:
            if save == false:
              
            else:
                geopandas.GeoDataFrame:
        """
        #Define the pixel size and multiply by the xsize to get xsize in number of pixels
        xsize = self.meta["transform"][0]*xsize
        ysize = (-self.meta["transform"][4])*ysize

        x = np.linspace(self.xmin, self.xmax-xsize, num=(self.xmax-self.xmin)//(xsize-xsize*overlap))
        y = np.linspace(self.ymin, self.ymax-ysize, num=(self.ymax-self.ymin)//(ysize-ysize*overlap))
        X,Y = np.meshgrid(x, y)
        polys = [box(x, y, x+xsize, y+ysize) for x,y in np.nditer([X,Y])]

        gdf = gpd.GeoDataFrame({'geometry':polys})
        gdf.crs = self.crs
        if save == False:
            return gdf
        else:
            os.chdir(savePath)
            gdf.to_file("regular_grid.geojson", driver="GeoJSON")

In [None]:
# save the root directory
owd = os.getcwd()

# define the path to the image that will be sampled 
pathToImage = "data/images/RapidEye/2328825_2011-08-13_RE1_3A_Analytic.tif"
# Create an instance of SampleShapefile
trainSampleShapefile = SampleShapefile(pathToImage)

In [None]:
os.chdir(owd)
# Generate grid and random shapefiles 32x32
savePath = "data/train/shapefiles/32_32/regular"
# Generate the regular grid shapefile (32x32) with 0.2 overlap 
# Since the pixel size have same decimal places that increase by 1 the tile size
# 31 was used instead of 32
trainSampleShapefile.generateShapefileRegularGrid(31,31,0.2,True,savePath)

In [None]:
os.chdir(owd)
# Generate grid and random shapefiles 32x32
savePath = "data/train/shapefiles/32_32/random"
# Generate the random grid sampling shapefile with 5000 samples
trainSampleShapefile.generateShapefileRandomGrid(31,31,5000,True,savePath)

In [None]:
os.chdir(owd)
# Generate grid and random shapefiles 64x64

# Define the path to save the geojson
savePath = "data/train/shapefiles/64_64/regular"
# Generate the regular grid shapefile (64x64) with 0.2 overlap 
# Since the pixel size have same decimal places that increase by 1 the tile size
# 61 was used instead of 64
trainSampleShapefile.generateShapefileRegularGrid(63,63,0.2,True,savePath)

In [None]:
os.chdir(owd)
# Generate random shapefiles 64x64

# Define the path to save the geojson
savePath = "data/train/shapefiles/64_64/random"
# Generate the random grid sampling shapefile with 5000 samples
trainSampleShapefile.generateShapefileRandomGrid(63,63,5000,True,savePath)

In [None]:
os.chdir(owd)
# Generate grid and random shapefiles 128x128

# Define the path to save the geojson
savePath = "data/train/shapefiles/128_128/regular"
# Generate the regular grid shapefile (64x64) with 0.2 overlap 
# Since the pixel size have same decimal places that increase by 1 the tile size
# 127 was used instead of 128
trainSampleShapefile.generateShapefileRegularGrid(128,128,0.2,True,savePath)

In [None]:
os.chdir(owd)
# Generate random shapefiles 128x128

# Define the path to save the geojson
savePath = "data/train/shapefiles/128_128/random"
# Generate the random grid sampling shapefile with 5000 samples
trainSampleShapefile.generateShapefileRandomGrid(127,127,5000,True,savePath)