In [1]:
import geopandas as gpd

inventory_path = "tehri_landslide/data/GeospatialMapsPackage/Landslide Polygon.shp"
landslides = gpd.read_file(inventory_path)

print(landslides.crs)
print(landslides.shape)


EPSG:4326
(7256, 12)


In [3]:
landslides = landslides.to_crs("EPSG:32644")
print(landslides.crs)
print(landslides.shape)

EPSG:32644
(7256, 12)


In [4]:
import rasterio
from rasterio.mask import mask
import numpy as np
import pandas as pd

In [5]:
stack_path = "tehri_landslide/data/feature_stack.tif"

In [6]:
with rasterio.open(stack_path) as src:
    out, _ = mask(src, landslides.geometry, crop=True)

In [7]:
# out shape = (bands, h, w)
arr = out.reshape(out.shape[0], -1).T  # flatten pixels
df_pos = pd.DataFrame(arr, columns=["elev","slope","aspect","ndvi","ndwi"])

In [8]:
# Remove nodata
df_pos.replace([-9999, np.nan], np.nan, inplace=True)
df_pos = df_pos.dropna()


In [9]:
df_pos["label"] = 1
print("Positive samples:", df_pos.shape)

Positive samples: (33919, 6)


In [10]:
import rasterio.features
import shapely.geometry as geom

In [11]:
with rasterio.open(stack_path) as src:
    full = src.read()
    profile = src.profile

In [12]:
# Create binary mask of landslides
mask_raster = rasterio.features.geometry_mask(
    landslides.geometry,
    transform=profile["transform"],
    invert=True,
    out_shape=(profile["height"], profile["width"])
)

In [13]:
# get non-landslide pixels
non_landslide_pixels = full[:, mask_raster]  # bands x pixels
non_landslide_pixels = non_landslide_pixels.T

In [14]:
df_neg = pd.DataFrame(non_landslide_pixels,
                      columns=["elev","slope","aspect","ndvi","ndwi"])

In [15]:
df_neg.replace([-9999, np.nan], np.nan, inplace=True)
df_neg = df_neg.dropna()

In [16]:
# sample same size as positives
df_neg = df_neg.sample(len(df_pos), random_state=42)
df_neg["label"] = 0

In [17]:
print("Negative samples:", df_neg.shape)

Negative samples: (33919, 6)


In [18]:
df = pd.concat([df_pos, df_neg], ignore_index=True)
print(df.head())
print(df.shape)

          elev       slope      aspect      ndvi      ndwi  label
0  3137.446777  303.976318  303.976318 -0.000740  0.009053      1
1  3134.586426  311.634216  311.634216  0.007442  0.021680      1
2  3127.721924  307.402252  307.402252  0.008362  0.016787      1
3  3120.857178  303.170258  303.170258  0.024818  0.021375      1
4  3115.355957  298.771790  298.771790  0.027911  0.001403      1
(67838, 6)


In [19]:
df.to_csv("tehri_landslide/data/training_dataset.csv", index=False)
print("Saved training dataset")

Saved training dataset
