## Notebook to develop decision tree for identifying snow in PlanetScope 4-band imagery
Rainey Aberle

### Import packages

In [None]:
import os
import glob
import numpy as np
import rasterio as rio
import geopandas as gpd
from sklearn.tree import DecisionTreeClassifier, export_text, plot_tree 
from sklearn.model_selection import train_test_split 
from sklearn import metrics 
import matplotlib.pyplot as plt
import matplotlib

### Define paths to directories

In [None]:
# base directory
base_path = '/Users/raineyaberle/Research/PhD/study-sites/Wolverine/'
# image directory
im_path = base_path+'imagery/Planet/2021-04-20_2021-08-25/adjusted-radiometry/'
# output folder
out_path = base_path+'../../planet-snow/figures/'

### Load image and snow/non-snow classified points

In [None]:
# Define EPSG code
epsg = 32606

# Load image
im_fn = im_path+'20210815_202055_60_2459_3B_AnalyticMS_SR_clip_PSB.SD_adj.tif'
im = rio.open(im_fn)
# read bands
b = im.read(1).astype(float)
r = im.read(2).astype(float)
g = im.read(3).astype(float)
nir = im.read(4).astype(float)
# define coordinates grid
im_x = np.linspace(im.bounds.left, im.bounds.right, num=np.shape(b)[1])
im_y = np.linspace(im.bounds.top, im.bounds.bottom, num=np.shape(b)[0])
print('Image CRS:',im.crs)

# Load snow training points
train_snow_pts_fn = base_path+'classified-points/snow_points.shp'
train_snow_pts = gpd.read_file(train_snow_pts_fn)
# Reproject to image CRS
train_snow_pts = train_snow_pts.to_crs(epsg)
print('Snow points CRS:', train_snow_pts.crs)

# Load non-snow points
train_non_snow_pts_fn = base_path+'classified-points/non_snow_points.shp'
train_non_snow_pts = gpd.read_file(train_non_snow_pts_fn)
# Reproject to image CRS
train_non_snow_pts = train_non_snow_pts.to_crs(epsg)
print('Non-snow points CRS:', train_non_snow_pts.crs)

# Plot
fig, ax1 = plt.subplots(1, 1, figsize=(12,12))
plt.rcParams.update({'font.size': 14, 'font.sans-serif': 'Arial'})
ax1.imshow(np.dstack([r, g, b]), 
           extent=(np.min(im_x), np.max(im_x), np.min(im_y), np.max(im_y)))
train_snow_pts.plot(ax=ax1, markersize=15, color='cyan', label='snow')
train_non_snow_pts.plot(ax=ax1, markersize=15, color='red', label='non-snow')
ax1.legend(loc='lower right')
ax1.set_xlabel('Easting [m]')
ax1.set_ylabel('Northing [m]')
plt.show()

### Add 'snow' classification column, merge snow and non-snow points, and sample band values at points

In [None]:
# -----Add snow classification column
train_snow_pts['snow'] = 1
train_non_snow_pts['snow'] = 0

# -----Merge snow and non-snow points
train_pts = train_snow_pts.append(train_non_snow_pts, ignore_index=True)
# Add coords column
train_pts['coords'] = [(pt.bounds[0], pt.bounds[1]) for pt in train_pts['geometry']]
# remove "id" and "geometry" columns
train_pts = train_pts.drop(columns=['id', 'geometry'])

# -----Sample band values at points
train_pts['blue'] = [x[0] for x in im.sample(train_pts['coords'])]
train_pts['green'] = [x[1] for x in im.sample(train_pts['coords'])]
train_pts['red'] = [x[2] for x in im.sample(train_pts['coords'])]
train_pts['NIR'] = [x[3] for x in im.sample(train_pts['coords'])]

print(train_pts)

### Set up decision tree classifier

Adapted from the [SciKit Learn Decision Tree documentation](https://scikit-learn.org/stable/modules/tree.html).

In [None]:
# -----Split dataset into features (band values) and target variable (snow)
feature_cols = ['blue', 'green', 'red', 'NIR']
X = train_pts[feature_cols] # features
y = train_pts['snow'] # target variable

# # -----Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) # 80% training and 20% test

# -----Create Decision Tree classifer object
dtree = DecisionTreeClassifier()

# -----Train Decision Tree Classifer
dtree = dtree.fit(X_train, y_train)

# -----Predict the response for test dataset
y_pred = dtree.predict(X_test)

# -----Show Decision Tree classifier
tree_text = export_text(dtree, feature_names=feature_cols)
print(tree_text)
plt.figure(figsize=(10,10))
plot_tree(dtree)
plt.show()

# -----Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

### Apply to full image

In [None]:
# -----Save band values in GeoDataFrame
im_x_mesh, im_y_mesh = np.meshgrid(im_x, im_y)
pts = gpd.GeoDataFrame()
pts['x'] = im_x_mesh[~np.isnan(b)].flatten()
pts['y'] = im_y_mesh[~np.isnan(b)].flatten()
pts['blue'] = b[~np.isnan(b)].flatten()
pts['green'] = g[~np.isnan(g)].flatten()
pts['red'] = r[~np.isnan(r)].flatten()
pts['NIR'] = nir[~np.isnan(nir)].flatten()

# -----Predict snow classification using band values
pts['snow_pred'] = dtree.predict(pts[feature_cols])
print(pts)

# -----Plot results
# extract snow prediction and coordinates
snow_pred, x, y = np.array(pts['snow_pred']), np.array(pts['x']), np.array(pts['y'])
fig, ax1 = plt.subplots(1, 1, figsize=(10,10))
plt.rcParams.update({'font.size': 14, 'font.sans-serif': 'Arial'})
ax1.imshow(np.dstack([r, g, b]), 
           extent=(np.min(im_x), np.max(im_x), np.min(im_y), np.max(im_y)))
ax1.set_xlabel('Easting [m]')
ax1.set_ylabel('Northing [m]')
ax1.scatter(x[snow_pred==1], y[snow_pred==1], s=0.1, color='cyan', label='snow')
# ax1.scatter(x[snow_pred==0], y[snow_pred==0], s=0.5, color='brown', label='non-snow')
ax1.legend(loc='lower right')
ax1.set_ylabel('Northing [m]')
plt.show()

### Apply to other images

In [None]:
# -----Load Planet image file names from directory
# ims = os.chdir(im_path) # change directory
# im_names = glob.glob('*SR_clip.tif') # load all .tif file names
# im_names.sort() # sort file names by date

# # -----Loop through images
# for im_name in im_names:
    