In [13]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from sklearn.preprocessing import PolynomialFeatures, normalize
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score

from feature_extractors import extract_features, extract_features_2d, extract_features_edge, extract_features_cogrey
from metric_helpers import compute_true_positive_rate
from visualization_helpers import pretty_confusion, label_to_img
from dataset_preprocessing import load_image, create_dataset, extract_patches, compute_input_features, compute_output_features

import matplotlib.pyplot as plt
from new_helpers import *

from PIL import Image


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
ROOT_DIR = "training/"
TRAIN_FRACTION = 0.8
FOREGROUND_THRESHOLD = 0.25
patch_size = 16
width = 400
height = 400
n_img = 10

# Extract 2d features

In [91]:
X1, Y1 = build_model_data(extract_features_2d, patch_size=patch_size, n_img=n_img)
X1 = normalize(X1)
neigh1 = KNeighborsClassifier(n_neighbors=5)
neigh1.fit(X1, Y1)

X [(6250, 2)] and Y [(6250,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Extract 6d features

In [92]:
X2, Y2 = build_model_data(extract_features, patch_size=patch_size, n_img=n_img)
X2 = normalize(X2)
neigh2 = KNeighborsClassifier(n_neighbors=5)
neigh2.fit(X2, Y2)

X [(6250, 6)] and Y [(6250,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Add canny edge detector

In [93]:
X3, Y3 = build_model_data(extract_features_edge, patch_size=patch_size, n_img=n_img)
X3 = normalize(X3)
neigh3 = KNeighborsClassifier(n_neighbors=5)
neigh3.fit(X3, Y3)

X [(6250, 7)] and Y [(6250,)]


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Add polynomial features

In [6]:
X4, Y4 = build_model_data(extract_features_edge, patch_size=patch_size, n_img=n_img)
poly = PolynomialFeatures(3)
X4 = poly.fit_transform(X4)
X4 = normalize(X4)
neigh4 = KNeighborsClassifier(n_neighbors=5)
neigh4.fit(X4, Y4)

[[[ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  ..., 
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]]

 [[ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  ..., 
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]]

 [[ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  ..., 
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]]

 ..., 
 [[ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  ..., 
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]]

 [[ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  0.  0.  0.]
  ..., 
  [ 0.  0.  0. ...,  0.  0.  0.]
  [ 0.  0.  0. ...,  

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

# Comparison

In [95]:
models = [(neigh1, X1, Y1), (neigh2, X2, Y2), (neigh3, X3, Y3), (neigh4, X4, Y4)]

for m in models:
    # Predict on the training set
    Z = m[0].predict(m[1])

    TPR = compute_true_positive_rate(m[2],Z)
    print('True positive rate = ' + str(TPR))
    print('F1-score:', f1_score(m[2],Z))
    pretty_confusion(["road", "bg"], m[2], Z)
    print('\n')

True positive rate = 0.13344
F1-score: 0.544564152791
            t/p           road             bg
           road           4021            401
             bg            994            834


True positive rate = 0.20944
F1-score: 0.746719908728
            t/p           road             bg
           road           4053            369
             bg            519           1309


True positive rate = 0.208
F1-score: 0.745199197478
            t/p           road             bg
           road           4061            361
             bg            528           1300


True positive rate = 0.20416
F1-score: 0.733966062698
            t/p           road             bg
           road           4049            373
             bg            552           1276




# Cross-validation without post-processing

In [27]:
for m in models:
    cr_val = cross_val_score(m[0], m[1], m[2], cv=10, scoring='accuracy')
    print('ACCURACY')
    print(cr_val)
    print('mean accuracy:', cr_val.mean())
    cr_val = cross_val_score(m[0], m[1], m[2], cv=10, scoring='f1')
    print('F1-SCORE')
    print(cr_val)
    print('mean f1:', cr_val.mean())
    print('\n')

ACCURACY
[ 0.71405751  0.70447284  0.7152      0.6912      0.6752      0.6688
  0.7328      0.6912      0.65865385  0.69551282]
mean accuracy: 0.69470970181
F1-SCORE
[ 0.33948339  0.4012945   0.36879433  0.34129693  0.35962145  0.29351536
  0.38827839  0.32752613  0.30163934  0.34931507]
mean f1: 0.347076489069


ACCURACY
[ 0.80031949  0.78594249  0.8352      0.7376      0.7488      0.7504
  0.7872      0.7984      0.78525641  0.80929487]
mean accuracy: 0.783841326288
F1-SCORE
[ 0.64589235  0.64736842  0.70144928  0.53142857  0.56986301  0.54117647
  0.61449275  0.61349693  0.5988024   0.67036011]
mean f1: 0.613433029556


ACCURACY
[ 0.78594249  0.7715655   0.8336      0.7456      0.7472      0.7488
  0.7968      0.8032      0.78685897  0.78525641]
mean accuracy: 0.780482337184
F1-SCORE
[ 0.62569832  0.62857143  0.70621469  0.56198347  0.53254438  0.54227405
  0.61398176  0.62613982  0.61891117  0.59146341]
mean f1: 0.604778251408


ACCURACY
[ 0.78594249  0.7715655   0.8336      0.7456

# Post processing

In [9]:
from skimage.morphology import *
def postprocessing(Z, w, h, patch_size):
    n_patch = np.int(w/patch_size)
    Z = Z.reshape(-1,n_patch*n_patch)
    
    Z = np.apply_along_axis(post_image, arr=Z, axis=1)
    Z = Z.reshape(-1,1)
    return Z

def post_image(Z):
    binary_closing(Z, out=Z)
    return Z

# Cross validation with post-processing

In [97]:
from sklearn.model_selection import KFold
def cross_validation(X, Y, neigh, cv=10, post=False, verbose=False):
    tot_f1 = 0
    tot_acc = 0
    kf = KFold(n_splits=cv)

    for k, (train, test) in enumerate(kf.split(X)):
        x_train = X[train]
        x_test = X[test]
        y_train = Y[train]
        y_test = Y[test]

        neigh.fit(x_train,y_train)

        z = neigh.predict(x_test)

        if post:
            z = postprocessing(z, width, height, patch_size)

        Zn = np.nonzero(z)[0]
        Yn = np.nonzero(y_test)[0]

        TPR = len(list(set(Yn) & set(Zn))) / float(len(z))
        f1 = f1_score(y_test,z)
        acc = accuracy_score(y_test,z)

        tot_f1 = tot_f1 + f1
        tot_acc = tot_acc + acc
        
        if verbose:
            print('K: ', k)
            print('True positive rate = ' + str(TPR))
            print('F1-score:', f1)
            print('accuracy score:', acc)
            print('\n')

    avg_f1 = tot_f1/cv
    avg_acc = tot_acc/cv
    
    print('Average Accuracy score:', avg_acc)
    print('Average F1-score:', avg_f1)

In [98]:
for m in models:
    print()
    cross_validation(m[1],m[2],m[0],post=False)
    cross_validation(m[1],m[2],m[0],post=True)


Average Accuracy score: 0.69056
Average F1-score: 0.368168735297
Average Accuracy score: 0.68576
Average F1-score: 0.449350944601

Average Accuracy score: 0.77952
Average F1-score: 0.607947219531
Average Accuracy score: 0.7704
Average F1-score: 0.633157527579

Average Accuracy score: 0.7768
Average F1-score: 0.60366873956
Average Accuracy score: 0.76848
Average F1-score: 0.63180251541

Average Accuracy score: 0.78
Average F1-score: 0.608494182914
Average Accuracy score: 0.76864
Average F1-score: 0.636761499171


# Cross-validation for n_neighbors and degree

In [None]:
X0, Y0 = build_model_data(extract_features_cogrey, patch_size=patch_size, n_img=n_img)
degrees = [2, 3, 5, 7]
n_neighbors = [3, 5, 7, 10]
for d in degrees:
    poly = PolynomialFeatures(d)
    X = poly.fit_transform(X0)
    X = normalize(X)
    for n in n_neighbors:
        neigh = KNeighborsClassifier(n_neighbors=n)
        
        cr_val = cross_val_score(neigh, X, Y0, cv=10, scoring='f1')
        print('F1-SCORE')
        print(cr_val)
        print('mean f1:', cr_val.mean())
        print('\n')

# Predictions

In [7]:
from sklearn.model_selection import train_test_split

def predict(neigh):
    X, Y = build_model_data(extract_features_edge, patch_size=patch_size, n_img=1000)
    poly = PolynomialFeatures(3)
    X = poly.fit_transform(X)
    X = normalize(X)
    neigh = KNeighborsClassifier(n_neighbors=5)
    
    cut = int(0.8*(Y.shape[0]))
    X_train = X[0:cut]
    X_test = X[cut:]
    Y_train = Y[0:cut]
    Y_test = Y[cut:]
    neigh.fit(X_train, Y_train)

    z = neigh.predict(X_test)
    z = postprocessing(z, width, height, patch_size)
    
    return z, Y_test

In [10]:
z, Y_test = predict(neigh4)

X [(62500, 7)] and Y [(62500,)]


In [22]:
pred_size = int(width/patch_size)*int(height/patch_size)
for ix, i in enumerate(range(0,z.shape[0],pred_size)):
    pred = z[i:i+pred_size]
    im = label_to_img(width, height, patch_size, patch_size, pred)
    plt.imsave('post/images/pred_{}'.format(ix), im, cmap='Greys_r')
    
for ix, i in enumerate(range(0,Y_test.shape[0],pred_size)):
    pred = Y_test[i:i+pred_size]
    im = label_to_img(width, height, patch_size, patch_size, pred)
    pimg = Image.fromarray((im*255.0).astype(np.uint8))
    pimg.save('post/groundtruth/pred_{}.png'.format(ix))

In [None]:
XX, YY = build_model_data(extract_features_2d, patch_size=patch_size, n_img=n_img)

[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 