In [None]:
from scipy.stats import expon
from sklearn import svm
from sklearn.model_selection import RandomizedSearchCV
import pickle
import numpy as np
import geopandas as gpd
# from geopandas import GeoDataFrame
import matplotlib
from matplotlib import pyplot

In [None]:
def train(X, Y):
    """
    Train classification algorithm.
    
    Train the Support Vector Machine classification algorithm using the
    specified fields. 

    Parameters
    ----------
    X: numpy 2D array
        A 2D numpy array where there is one row for each segment and each
        column represents an attribute of the segments. 

    Y: numpy 1D array
        A 1D numpy array equal in length to the number of records in segments.
        The single column contains actual class values for each of the
        segments.

    output_filename: string
        Output filename of the pickled trained SVM model.

    Returns
    -------
    model: svm.SVC
        Returns a trained SVM model that can be used to classify other data.

    """
    clf = svm.SVC()
        
    # specify parameters and distributions to sample from
    param_dist = {'C': expon(scale=100),
                  'gamma': expon(scale=.1),
                  'kernel': ['rbf'],
                  'class_weight':['balanced', None]}

    # run randomized search
    n_iter_search = 20
    random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=n_iter_search)

    random_search.fit(X, Y) # this may take time...
    
    return random_search

In [None]:
def predict(model, X):
    """
    Classify segments using a trained SVM model

    Classify image segments using the trained Support Vector Machine model. 

    Parameters
    ----------
     model: svm.SVC
        A trained SVM model that can be used to classify other data.

    X: numpy 2D array
        A 2D numpy array where there is one row for each segment and each
        column represents an attribute of the segments. Identical to segments
        from the train_classifier function.
    """
    predictions = model.predict(X)

    return predictions

In [None]:
for_training = gpd.read_file("rag_and_zonal_statistics/wetlands_update2.gpkg", layer="ready2classify")

In [None]:
for_training.head

In [None]:
big_train = for_training[~for_training["class"].isnull()]

In [None]:
big_train.head

In [None]:
big_train.columns.values

In [None]:
labels = big_train['class']

In [None]:
labels

In [None]:
classes = big_train[['red_mean', 'green_mean', "blue_mean","nir_mean", "eccentricity", "orientation", "sobel_max"]]

In [None]:
classes

In [None]:
type(classes)

In [None]:
type(labels)

In [None]:
model = train(classes, labels)

In [None]:
to_predict = for_training[['red_mean', 'green_mean', "blue_mean","nir_mean", "eccentricity", "orientation", "sobel_max"]]

In [None]:
to_predict

In [None]:
output = predict(model, to_predict.values)

In [None]:
output

In [None]:
for_training['classified'] = output

In [None]:
for_training

In [None]:
for_training.to_file("rag_and_zonal_statistics/wetland_update_nir.gpkg", layer="predictions", driver="GPKG")