In [1]:
import numpy as np
import pandas as pd
from skimage import io, feature, color, data
import matplotlib.pyplot as plt 
import cv2
import os
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
import time
from keras.preprocessing import image
from sklearn.metrics import accuracy_score


In [2]:
train_img_path = './data/seg_train/seg_train/'
test_img_path = './data/seg_test/seg_test/'

X_train = []
y_train = []

X_test = []
y_test = []

labels=['Buildings','Forest', 'Glacier','Mountain','Sea','Street']


In [3]:
#Extracts Histogram of Oriented Gradients of one image used for classification
def get_hog(jpeg_path):
    jpg = cv2.imread(jpeg_path)
    jpg = cv2.resize(jpg,(150,150))
    gray= cv2.cvtColor(jpg,cv2.COLOR_BGR2GRAY)
    hog = feature.hog(gray)/255.0
    return hog

In [4]:
#Fetches all images from the data folder and gets hog for each file aswell as saves the labels in different array
#Works with both train and test data
def jpeg_to_array (scene_type, img_root_path, data_type):
    scene_path = os.path.join(img_root_path,scene_type.lower())
    print('Loading ' + data_type +' images for scene type '+scene_type)
    for img in os.listdir(scene_path):
        img_path = os.path.join(scene_path,img)
        #Check if the path leads to an image
        if img_path.endswith('.jpg'):
            #Check if it is training or testing data and puts it in the right array
            if(data_type == 'Training'):
                X_train.append(get_hog(img_path))
                y_train.append(labels.index(str(scene_type)))
            if(data_type =='Testing'):
                X_test.append(get_hog(img_path))
                y_test.append(labels.index(str(scene_type)))


In [5]:
[jpeg_to_array(scene,train_img_path,'Training')for scene in labels]

Loading Training images for scene type Buildings
Loading Training images for scene type Forest
Loading Training images for scene type Glacier
Loading Training images for scene type Mountain
Loading Training images for scene type Sea
Loading Training images for scene type Street


[None, None, None, None, None, None]

In [6]:
[jpeg_to_array(scene,test_img_path,'Testing')for scene in labels]

Loading Testing images for scene type Buildings
Loading Testing images for scene type Forest
Loading Testing images for scene type Glacier
Loading Testing images for scene type Mountain
Loading Testing images for scene type Sea
Loading Testing images for scene type Street


[None, None, None, None, None, None]

In [7]:
#Create model and fit it:


In [9]:
pred = []
for i in range(4,10):
    neigh = KNeighborsClassifier(n_neighbors=i, weights='distance', p=1)
    neigh.fit(X_train, y_train)
    pred.append(neigh.predict(X_test))
    print('Done with n= ' + str(i))
    

Done with n= 4
Done with n= 5
Done with n= 6
Done with n= 7
Done with n= 8
Done with n= 9


In [10]:
for preds in pred:
    print(confusion_matrix(y_test,preds))
    print(accuracy_score(y_test,preds))

[[187 110  16  30  73  21]
 [  0 465   0   4   5   0]
 [  2  51 128 128 244   0]
 [  2  19  34 220 250   0]
 [  3  12   9  27 456   3]
 [ 35 237  13  22  48 146]]
0.534
[[186 117  18  32  69  15]
 [  0 463   0   4   7   0]
 [  2  55 106 132 257   1]
 [  0  22  28 212 263   0]
 [  3  11   6  20 468   2]
 [ 31 250  10  20  54 136]]
0.5236666666666666
[[185 117  15  38  68  14]
 [  0 464   0   4   6   0]
 [  2  55 116 123 257   0]
 [  0  20  29 210 266   0]
 [  3  14   4  21 466   2]
 [ 31 247  12  17  56 138]]
0.5263333333333333
[[177 120  17  36  72  15]
 [  0 465   0   4   5   0]
 [  1  56 114 114 268   0]
 [  0  19  21 217 268   0]
 [  3  14   4  22 465   2]
 [ 20 262   9  17  56 137]]
0.525
[[174 126  16  37  70  14]
 [  0 464   0   4   6   0]
 [  1  56 114 116 266   0]
 [  0  19  25 210 271   0]
 [  3  15   2  17 471   2]
 [ 22 262   8  20  53 136]]
0.523
[[172 129  16  38  68  14]
 [  0 464   0   4   6   0]
 [  1  58 105 113 276   0]
 [  0  21  19 211 274   0]
 [  3  15   2  15 473