In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from sklearn.preprocessing import PolynomialFeatures, normalize
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score

from helpers.dataset_preprocessing import build_model_data
from helpers.feature_extractors import extract_features, extract_features_2d, extract_features_edge
from helpers.metric_helpers import compute_true_positive_rate
from helpers.visualization_helpers import pretty_confusion

from helpers.temp_helpers import *

In [2]:
patch_size = 16
width = 400
height = 400
n_img = 10
root_dir = "training/"

# Extract 2d features

In [3]:
X1, Y1 = build_model_data(root_dir, extract_features_2d, patch_size=patch_size, n_img=n_img)
X1 = normalize(X1)
neigh1 = KNeighborsClassifier(n_neighbors=5)
neigh1.fit(X1, Y1)

Original loaded dataset size: 10
X [(62500, 2)] and Y [(62500,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Extract 6d features

In [4]:
X2, Y2 = build_model_data(root_dir, extract_features, patch_size=patch_size, n_img=n_img)
X2 = normalize(X2)
neigh2 = KNeighborsClassifier(n_neighbors=5)
neigh2.fit(X2, Y2)

Original loaded dataset size: 10
X [(62500, 6)] and Y [(62500,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Add canny edge detector

In [5]:
X3, Y3 = build_model_data(root_dir, extract_features_edge, patch_size=patch_size, n_img=n_img)
X3 = normalize(X3)
neigh3 = KNeighborsClassifier(n_neighbors=5)
neigh3.fit(X3, Y3)

Original loaded dataset size: 10
X [(62500, 7)] and Y [(62500,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Add polynomial features

In [6]:
X4, Y4 = build_model_data(root_dir, extract_features_edge, patch_size=patch_size, n_img=n_img)
poly = PolynomialFeatures(3)
X4 = poly.fit_transform(X4)
X4 = normalize(X4)
neigh4 = KNeighborsClassifier(n_neighbors=5)
neigh4.fit(X4, Y4)

Original loaded dataset size: 10
X [(62500, 7)] and Y [(62500,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Comparison

In [7]:
models = [(neigh1, X1, Y1), (neigh2, X2, Y2), (neigh3, X3, Y3), (neigh4, X4, Y4)]

for m in models:
    # Predict on the training set
    Z = m[0].predict(m[1])

    TPR = compute_true_positive_rate(m[2],Z)
    print('True positive rate = ' + str(TPR))
    print('F1-score:', f1_score(m[2],Z))
    pretty_confusion(["road", "bg"], m[2], Z)
    print('\n')

True positive rate = 0.089648
F1-score: 0.450510573289
            t/p           road             bg
           road          43229           3080
             bg          10588           5603


True positive rate = 0.152768
F1-score: 0.646598720076
            t/p           road             bg
           road          42515           3794
             bg           6643           9548


True positive rate = 0.153264
F1-score: 0.647995941147
            t/p           road             bg
           road          42514           3795
             bg           6612           9579


True positive rate = 0.160624
F1-score: 0.670697487974
            t/p           road             bg
           road          42603           3706
             bg           6152          10039




# Cross-validation without post-processing

In [8]:
for m in models:
    cr_val = cross_val_score(m[0], m[1], m[2], cv=10, scoring='accuracy')
    print('ACCURACY')
    print(cr_val)
    print('mean accuracy:', cr_val.mean())
    cr_val = cross_val_score(m[0], m[1], m[2], cv=10, scoring='f1')
    print('F1-SCORE')
    print(cr_val)
    print('mean f1:', cr_val.mean())
    print('\n')

ACCURACY
[ 0.70276756  0.68896     0.6848      0.6864      0.70336     0.69728
  0.69456     0.68128     0.70192     0.68138902]
mean accuracy: 0.692271657943
F1-SCORE
[ 0.23539095  0.20523303  0.22805643  0.22098569  0.23261589  0.22075783
  0.21148286  0.25056433  0.22600748  0.21706646]
mean f1: 0.224816094488


ACCURACY
[ 0.78403455  0.75472     0.74416     0.70688     0.76688     0.77248
  0.74096     0.75104     0.73808     0.66074572]
mean accuracy: 0.741998027379
F1-SCORE
[ 0.53956344  0.45269547  0.4836939   0.41282051  0.48715241  0.48215586
  0.46479339  0.52990937  0.41598288  0.43193998]
mean f1: 0.470070719621


ACCURACY
[ 0.78131499  0.75504     0.74816     0.70656     0.7656      0.77376
  0.7408      0.75408     0.74016     0.66170587]
mean accuracy: 0.742718086254
F1-SCORE
[ 0.53582343  0.45496618  0.48862898  0.41851617  0.48506151  0.48693759
  0.46534653  0.53466546  0.41833811  0.43080237]
mean f1: 0.471908633021


ACCURACY
[ 0.79331307  0.75648     0.77296     0.

# Post processing

In [9]:
from skimage.morphology import *
def postprocessing(Z, w, h, patch_size):
    n_patch = np.int(w/patch_size)
    Z = Z.reshape(-1,n_patch*n_patch)
    
    Z = np.apply_along_axis(post_image, arr=Z, axis=1)
    Z = Z.reshape(-1,1)
    return Z

def post_image(Z):
    binary_closing(Z, out=Z)
    return Z

# Cross validation with post-processing

In [10]:
from sklearn.model_selection import KFold
def cross_validation(X, Y, neigh, cv=10, post=False, verbose=False):
    tot_f1 = 0
    tot_acc = 0
    kf = KFold(n_splits=cv)

    for k, (train, test) in enumerate(kf.split(X)):
        x_train = X[train]
        x_test = X[test]
        y_train = Y[train]
        y_test = Y[test]

        neigh.fit(x_train,y_train)

        z = neigh.predict(x_test)

        if post:
            z = postprocessing(z, width, height, patch_size)

        Zn = np.nonzero(z)[0]
        Yn = np.nonzero(y_test)[0]

        TPR = len(list(set(Yn) & set(Zn))) / float(len(z))
        f1 = f1_score(y_test,z)
        acc = accuracy_score(y_test,z)

        tot_f1 = tot_f1 + f1
        tot_acc = tot_acc + acc
        
        if verbose:
            print('K: ', k)
            print('True positive rate = ' + str(TPR))
            print('F1-score:', f1)
            print('accuracy score:', acc)
            print('\n')

    avg_f1 = tot_f1/cv
    avg_acc = tot_acc/cv
    
    print('Average Accuracy score:', avg_acc)
    print('Average F1-score:', avg_f1)

In [11]:
for m in models:
    print()
    cross_validation(m[1],m[2],m[0],post=False)
    cross_validation(m[1],m[2],m[0],post=True)


Average Accuracy score: 0.692768
Average F1-score: 0.22384039764
Average Accuracy score: 0.677056
Average F1-score: 0.267398999642

Average Accuracy score: 0.742448
Average F1-score: 0.468462493508
Average Accuracy score: 0.736432
Average F1-score: 0.522067768977

Average Accuracy score: 0.743232
Average F1-score: 0.470380558997
Average Accuracy score: 0.737024
Average F1-score: 0.523654567472

Average Accuracy score: 0.763648
Average F1-score: 0.503290265225
Average Accuracy score: 0.757888
Average F1-score: 0.550368795358
