In [7]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from sklearn.preprocessing import PolynomialFeatures, normalize
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score

from helpers.feature_extractors import extract_features, extract_features_2d, extract_features_edge, extract_features_cogrey
from helpers.metric_helpers import compute_true_positive_rate
from helpers.visualization_helpers import pretty_confusion, label_to_img
from helpers.dataset_preprocessing import build_model_data, load_image, create_dataset, extract_patches, compute_input_features, compute_output_features
from helpers.temp_helpers import *
import matplotlib.pyplot as plt

from PIL import Image


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# K Nearest Neighbors Classifier
This notebook shows a comparison of different parameters and features used to train a K Nearest Neighbor Classifier.

In [8]:
ROOT_DIR = "training/"
TRAIN_FRACTION = 0.8
FOREGROUND_THRESHOLD = 0.25
patch_size = 16
width = 400
height = 400
n_img = 10
root_dir = "training/"

# Extract 2d features

In [21]:
X1, Y1 = build_model_data(root_dir, extract_features_2d, patch_size=patch_size, n_img=n_img)
X1 = normalize(X1)
neigh1 = KNeighborsClassifier(n_neighbors=5)
neigh1.fit(X1, Y1)

Original loaded dataset size: 10
X [(6250, 2)] and Y [(6250,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Extract 6d features

In [22]:
X2, Y2 = build_model_data(root_dir, extract_features, patch_size=patch_size, n_img=n_img)
X2 = normalize(X2)
neigh2 = KNeighborsClassifier(n_neighbors=5)
neigh2.fit(X2, Y2)

Original loaded dataset size: 10
X [(6250, 6)] and Y [(6250,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Add canny edge detector

In [23]:
X3, Y3 = build_model_data(root_dir, extract_features_edge, patch_size=patch_size, n_img=n_img)
X3 = normalize(X3)
neigh3 = KNeighborsClassifier(n_neighbors=5)
neigh3.fit(X3, Y3)

Original loaded dataset size: 10
X [(6250, 7)] and Y [(6250,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Add polynomial features

In [24]:
X4, Y4 = build_model_data(root_dir, extract_features_edge, patch_size=patch_size, n_img=n_img)
poly = PolynomialFeatures(2)
X4 = poly.fit_transform(X4)
X4 = normalize(X4)
neigh4 = KNeighborsClassifier(n_neighbors=5)
neigh4.fit(X4, Y4)

Original loaded dataset size: 10
X [(6250, 7)] and Y [(6250,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Comparison

In [25]:
models = [(neigh1, X1, Y1), (neigh2, X2, Y2), (neigh3, X3, Y3), (neigh4, X4, Y4)]

for m in models:
    Z = m[0].predict(m[1])

    TPR = compute_true_positive_rate(m[2],Z)
    print('True positive rate = ' + str(TPR))
    print('F1-score:', f1_score(m[2],Z))
    pretty_confusion(["road", "bg"], m[2], Z)
    print('\n')

True positive rate = 0.13344
F1-score: 0.544564152791
            t/p           road             bg
           road           4021            401
             bg            994            834


True positive rate = 0.20944
F1-score: 0.746719908728
            t/p           road             bg
           road           4053            369
             bg            519           1309


True positive rate = 0.208
F1-score: 0.745199197478
            t/p           road             bg
           road           4061            361
             bg            528           1300


True positive rate = 0.20448
F1-score: 0.735539568345
            t/p           road             bg
           road           4053            369
             bg            550           1278




# Cross-validation without post-processing

In [26]:
for m in models:
    cr_val = cross_val_score(m[0], m[1], m[2], cv=10, scoring='accuracy')
    print('ACCURACY')
    print(cr_val)
    print('mean accuracy:', cr_val.mean())
    cr_val = cross_val_score(m[0], m[1], m[2], cv=10, scoring='f1')
    print('F1-SCORE')
    print(cr_val)
    print('mean f1:', cr_val.mean())
    print('\n')

ACCURACY
[ 0.7172524   0.70607029  0.704       0.6976      0.6608      0.6848
  0.6992      0.6864      0.66346154  0.67948718]
mean accuracy: 0.689907140165
F1-SCORE
[ 0.37894737  0.43902439  0.36860068  0.38834951  0.34969325  0.35409836
  0.35616438  0.35947712  0.33121019  0.33333333]
mean f1: 0.365889860017


ACCURACY
[ 0.80191693  0.78434505  0.8336      0.7328      0.7456      0.7472
  0.7952      0.7808      0.77083333  0.80128205]
mean accuracy: 0.779357736545
F1-SCORE
[ 0.64571429  0.65116279  0.70454545  0.52691218  0.56198347  0.53529412
  0.62573099  0.60058309  0.58064516  0.65745856]
mean f1: 0.609003011034


ACCURACY
[ 0.79392971  0.7715655   0.8288      0.7344      0.7424      0.7456
  0.7936      0.7808      0.76762821  0.79967949]
mean accuracy: 0.775840289998
F1-SCORE
[ 0.63037249  0.63239075  0.69688385  0.52840909  0.55890411  0.53097345
  0.62170088  0.60518732  0.57971014  0.65181058]
mean f1: 0.603634267239


ACCURACY
[ 0.78913738  0.78115016  0.832       0.752

# Post processing

In [27]:
from skimage.morphology import *
def postprocessing(Z, w, h, patch_size):
    n_patch = np.int(w/patch_size)
    Z = Z.reshape(-1,n_patch*n_patch)
    
    Z = np.apply_along_axis(post_image, arr=Z, axis=1)
    Z = Z.reshape(-1,1)
    return Z

def post_image(Z):
    binary_closing(Z, out=Z)
    return Z

# Cross validation with post-processing

In [28]:
from sklearn.model_selection import KFold
def cross_validation(X, Y, neigh, cv=10, post=False, verbose=False):
    tot_f1 = 0
    tot_acc = 0
    kf = KFold(n_splits=cv)

    for k, (train, test) in enumerate(kf.split(X)):
        x_train = X[train]
        x_test = X[test]
        y_train = Y[train]
        y_test = Y[test]

        neigh.fit(x_train,y_train)

        z = neigh.predict(x_test)

        if post:
            z = postprocessing(z, width, height, patch_size)

        Zn = np.nonzero(z)[0]
        Yn = np.nonzero(y_test)[0]

        TPR = len(list(set(Yn) & set(Zn))) / float(len(z))
        f1 = f1_score(y_test,z)
        acc = accuracy_score(y_test,z)

        tot_f1 = tot_f1 + f1
        tot_acc = tot_acc + acc
        
        if verbose:
            print('K: ', k)
            print('True positive rate = ' + str(TPR))
            print('F1-score:', f1)
            print('accuracy score:', acc)
            print('\n')

    avg_f1 = tot_f1/cv
    avg_acc = tot_acc/cv
    
    print('Average Accuracy score:', avg_acc)
    print('Average F1-score:', avg_f1)

In [29]:
for m in models:
    print()
    cross_validation(m[1],m[2],m[0],post=False)
    cross_validation(m[1],m[2],m[0],post=True)


Average Accuracy score: 0.69056
Average F1-score: 0.368168735297
Average Accuracy score: 0.68576
Average F1-score: 0.449350944601

Average Accuracy score: 0.77952
Average F1-score: 0.607947219531
Average Accuracy score: 0.7704
Average F1-score: 0.633157527579

Average Accuracy score: 0.7768
Average F1-score: 0.60366873956
Average Accuracy score: 0.76848
Average F1-score: 0.63180251541

Average Accuracy score: 0.78048
Average F1-score: 0.609667054322
Average Accuracy score: 0.76768
Average F1-score: 0.63680227299


# Cross-validation for n_neighbors and degree

In [None]:
X0, Y0 = build_model_data(root_dir, extract_features_edge, patch_size=patch_size, n_img=n_img)
degrees = [1, 2, 3, 5, 7]
n_neighbors = [3, 5, 7, 10]
for d in degrees:
    poly = PolynomialFeatures(d)
    X = poly.fit_transform(X0)
    X = normalize(X)
    for n in n_neighbors:
        neigh = KNeighborsClassifier(n_neighbors=n)
        
        cr_val = cross_val_score(neigh, X, Y0, cv=10, scoring='f1')
        print('F1-SCORE')
        print(cr_val)
        print('mean f1:', cr_val.mean())
        print('\n')