In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

import rasterio
import geopandas as gpd

from shapely.geometry import Polygon

import iceplant_detection_functions as ipf

In [None]:
from imblearn.over_sampling import SMOTE

In [None]:
#pip install -U imbalanced-learn

# AREAS OF INTEREST

In [None]:
aois = gpd.read_file(os.path.join(os.getcwd(),'areas_of_interest','areas_of_interest.shp'))
cp = aois.iloc[0]  # Campus Lagoon aoi

# ASSEMBLE TRAINING SET

All points in the training set were collected in a single NAIP image, 
id = cp.itemid

In [None]:
fp = os.path.join(os.getcwd(),'training_set','aux_data','trainset_trees','trainset_trees.csv')
trees_df = pd.read_csv(fp)
#trees_df.head(1)

In [None]:
fp = os.path.join(os.getcwd(),'training_set','aux_data','trainset_non_iceplant_vegetation.csv')
noniceplant_features_df = pd.read_csv(fp)
#noniceplant_features_df.head(1)

In [None]:
fp = os.path.join(os.getcwd(),'training_set','aux_data','trainset_BIG.csv')
features_df = pd.read_csv(fp)
#features_df.head(1)

In [None]:
# add non-iceplant vegetation points sample to BIG training set 
features_df = pd.concat([features_df,
                         noniceplant_features_df.sample(frac=0.01),
                         trees_df])
features_df.drop(['geometry','Unnamed: 0','x','y'],axis=1,inplace=True)
ipf.iceplant_counts(features_df)

# SPLIT INTO TRAIN/TEST

In [20]:
train_features, test_features, train_labels, test_labels = ipf.test_train_from_df(features_df)

ipf.train_test_shapes(train_features, train_labels, test_features, test_labels)
ipf.test_train_proportions(train_labels, test_labels)

Training Features Shape: (4981, 4)
Training Labels Shape: (4981,)
Testing Features Shape: (2136, 4)
Testing Labels Shape: (2136,)

TRAIN SET: Iceplant / no iceplant counts
[[   0 3870]
 [   1 1111]] 

TEST SET: Iceplant / no iceplant counts
[[   0 1638]
 [   1  498]]



# MODEL 1: standard rfc 
- covariates: r,g, b, nir
- trained on previous set (bigger set than original TRIALS_random forest)

- predictions are only made over NDVI>0.2

In [None]:
# Train random forest classifier
rfc = RandomForestClassifier(n_estimators = 150, random_state = 42)
rfc.fit(train_features,train_labels)

In [None]:
predictions = rfc.predict(test_features)

ipf.print_rfc_evaluation(rfc, test_features, test_labels, predictions)

In [None]:
reconstructed = ipf.mask_ndvi_and_predict(cp.itemid, cp.geometry,rfc)

fig, ax = plt.subplots(figsize=(15, 15))
plt.title("PREDICTIONS : standard rfc model")
ax.imshow(reconstructed)
plt.show()

# MODEL 2: standard rfc + SMOTE
- covariates: r,g, b, nir
- trained on previous set (bigger set than original TRIALS_random forest)
- SMOTE oversampling used to deal with class imbalance

https://imbalanced-learn.org/stable/references/generated/imblearn.over_sampling.SMOTE.html?highlight=smote#imblearn.over_sampling.SMOTE

- predictions are only made over NDVI>0.2

In [None]:
# create test and train sets with SMOTE

labels = np.array(features_df['iceplant'])
features = np.array(features_df.drop('iceplant', axis = 1))

oversample = SMOTE()
over_X, over_y = oversample.fit_resample(features, labels)
over_X_train, over_X_test, over_y_train, over_y_test = train_test_split(over_X, over_y, test_size=0.1, stratify=over_y)

# ------------------------------------------------------------------------------------------

#Build SMOTE rfc model
SMOTE_SRF = RandomForestClassifier(n_estimators=150, 
                                   random_state=42)
#Train SMOTE rfc
SMOTE_SRF.fit(over_X_train, over_y_train)

In [None]:
SMOTE_predictions = SMOTE_SRF.predict(over_X_test)

ipf.print_rfc_evaluation(SMOTE_SRF, over_X_test, over_y_test, SMOTE_predictions)

In [None]:
predictions = ipf.mask_ndvi_and_predict(cp.itemid, cp.geometry,SMOTE_SRF)

fig, ax = plt.subplots(figsize=(15, 15))
plt.title("PREDICTIONS : standard rfc model with SMOTE sampling")
ax.imshow(predictions)
plt.show()

# MODEL 3: rfc with class weighting
- class_weight = balanced in rfc model

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html

In [None]:
rfc_balanced = RandomForestClassifier(n_estimators = 150, 
                                      random_state = 42,
                                      class_weight='balanced')
rfc_balanced.fit(train_features,train_labels)
predictions_balanced = rfc_balanced.predict(test_features)
ipf.print_rfc_evaluation(rfc_balanced, test_features, test_labels, predictions_balanced)

In [None]:
reconstructed = ipf.mask_ndvi_and_predict(cp.itemid, cp.geometry, rfc_balanced)

fig, ax = plt.subplots(figsize=(20, 20))
ax.imshow(reconstructed)
plt.show()

# **** MOVE THESE TO ANOTHER NOTEBOOK ****

In [None]:
lagoon_zoom = ipf.mask_ndvi_and_predict(itemid, lagoon_zoom_box,rfc)
fig, ax = plt.subplots(figsize=(20, 20))
ax.imshow(lagoon_zoom)
plt.show()

In [None]:
ipf.plot_window_in_scene(itemid, lagoon_zoom_box)

In [None]:
tree_predict = ipf.mask_ndvi_and_predict(itemid, tree_box, rfc)
fig, ax = plt.subplots(figsize=(8,8))
ax.imshow(tree_predict)
plt.show()

In [None]:
tree_ndvi = ipf.select_ndvi_image(itemid,tree_box)
# fig, ax = plt.subplots(figsize=(8, 8))
# ax.imshow(tree_ndvi)
# plt.show()

In [None]:
true_tree = tree_ndvi - tree_predict
fig, ax = plt.subplots(figsize=(8, 8))
ax.imshow(true_tree)
plt.show()

In [None]:
tree_naip = ipf.open_window_in_scene(itemid, tree_box)

In [None]:
p = (tree_naip*tree_predict).reshape([4,-1]).T
p = pd.DataFrame(p, columns =['r','g','b','nir'])
df = p[(p.r!=0) & (p.g!=0) & (p.b!=0) & (p.nir!=0)]
df['classified']=1
df

In [None]:
icep_predict = tree_naip*true_tree
p = icep_predict.reshape([4,-1]).T
p = pd.DataFrame(p, columns =['r','g','b','nir'])
df2 = p[(p.r!=0) & (p.g!=0) & (p.b!=0) & (p.nir!=0)]
df2['classified']=0
df2

In [None]:
df = pd.concat([df,df2])
df

In [None]:
dangermond = ipf.mask_ndvi_and_predict(dangermond_itemid, dangermond_box,rfc_balanced)
fig, ax = plt.subplots(figsize=(20, 20))
ax.imshow(dangermond)
plt.show()

In [None]:
ipf.plot_window_in_scene(dangermond_itemid, dangermond_box)