In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

import rasterio
import geopandas as gpd

from shapely.geometry import Polygon

import iceplant_detection_functions as ipf
import model_prep_and_evals as mpe

import warnings

In [2]:
df = pd.read_csv(os.path.join(os.getcwd(),'feature_selection','samples_for_model.csv'))
df.drop(['Unnamed: 0'], axis=1, inplace=True)
df.head(3)

Unnamed: 0,r,g,b,nir,ndvi,year,month,day_in_year,iceplant
0,134,125,103,170,0.118421,2012,5,126,1
1,130,114,101,164,0.115646,2012,5,126,1
2,132,110,98,160,0.09589,2012,5,126,1


In [3]:
X_train, X_test, y_train, y_test = mpe.test_train_from_df(df)
mpe.test_train_proportions(y_train, y_test)

TRAIN SET COUNTS:
no-iceplant:iceplant ratio     1.9 :1
          counts  percentage
iceplant                    
0         159093       65.99
1          82006       34.01

TEST SET COUNTS:
no-iceplant:iceplant ratio     2.0 :1
          counts  percentage
iceplant                    
0          68381       66.18
1          34948       33.82



In [8]:
gbc = GradientBoostingClassifier(n_estimators = 200, 
                                 learning_rate = 0.1,
                                 max_depth = 3,
#                                 validation_fraction = 0.2,
#                                 n_iter_no_change = 10,
                                 random_state=0,
                                 verbose=1)
gbc.fit(X_train, y_train)

      Iter       Train Loss   Remaining Time 
         1           1.2059            1.13m
         2           1.1429            1.12m
         3           1.0832            1.10m
         4           1.0321            1.10m
         5           0.9931            1.10m
         6           0.9549            1.11m
         7           0.9203            1.11m
         8           0.8917            1.12m
         9           0.8651            1.10m
        10           0.8407            1.09m
        20           0.6649            1.01m
        30           0.5730           57.46s
        40           0.5133           54.39s
        50           0.4712           50.97s
        60           0.4385           47.67s
        70           0.4129           44.27s
        80           0.3929           40.93s
        90           0.3765           37.60s
       100           0.3619           34.20s
       200           0.2913            0.00s


GradientBoostingClassifier(n_estimators=200, random_state=0, verbose=1)

In [9]:
predictions = gbc.predict(X_test)
mpe.print_threshold_metrics(y_test, predictions)


true negatives: 65956     false positives: 2425
false negatives: 3085     true positives: 31863

sensitivity (TP/P): 91.17 %
specificity (TN/N): 96.45 %
G-mean:  0.94

precision (TP/(TP+FP)): 92.93 %

MCC:  0.8804175907331317

F1-measure:  0.92042
F0.5-measure (min false positives):  0.92571
F2-measure (min false negatives)  :  0.91518

accuracy: 94.67 %

