In [1]:
import numpy as np
import pandas as pd
from skimage import io, feature, color, data
import matplotlib.pyplot as plt 
import cv2
import os
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV 

In [2]:
train_img_path = './data/seg_train/seg_train/'
test_img_path = './data/seg_test/seg_test/'

X_train = []
y_train = []

X_test = []
y_test = []

labels=['Buildings','Forest', 'Glacier','Mountain','Sea','Street']


In [3]:
#Extracts Histogram of Oriented Gradients of one image used for classification
def get_hog(jpeg_path):
    jpg = cv2.imread(jpeg_path)
    jpg = cv2.resize(jpg,(150,150)) 
    hog = feature.hog(jpg)/255.0
    return hog

In [4]:
#Fetches all images from the data folder and gets hog for each file aswell as saves the labels in different array
#Works with both train and test data
def jpeg_to_array (scene_type, img_root_path, data_type):
    scene_path = os.path.join(img_root_path,scene_type.lower())
    print('Loading ' + data_type +' images for scene type '+scene_type)
    for img in os.listdir(scene_path):
        img_path = os.path.join(scene_path,img)
        #Check if the path leads to an image
        if img_path.endswith('.jpg'):
            #Check if it is training or testing data and puts it in the right array
            if(data_type == 'Training'):
                X_train.append(get_hog(img_path))
                y_train.append(labels.index(str(scene_type)))
            if(data_type =='Testing'):
                X_test.append(get_hog(img_path))
                y_test.append(labels.index(str(scene_type)))


In [5]:
[jpeg_to_array(scene,train_img_path,'Training')for scene in labels]

Loading Training images for scene type Buildings
Loading Training images for scene type Forest
Loading Training images for scene type Glacier
Loading Training images for scene type Mountain
Loading Training images for scene type Sea
Loading Training images for scene type Street


[None, None, None, None, None, None]

In [6]:
[jpeg_to_array(scene,test_img_path,'Testing')for scene in labels]

Loading Testing images for scene type Buildings
Loading Testing images for scene type Forest
Loading Testing images for scene type Glacier
Loading Testing images for scene type Mountain
Loading Testing images for scene type Sea
Loading Testing images for scene type Street


[None, None, None, None, None, None]

In [7]:
# train the model on train set 
model = SVC() 
model.fit(X_train, y_train) 
  
# print prediction results 
predictions = model.predict(X_test) 
print(classification_report(y_test, predictions)) 

              precision    recall  f1-score   support

           0       0.83      0.82      0.82       437
           1       0.90      0.94      0.92       474
           2       0.62      0.66      0.64       553
           3       0.60      0.58      0.59       525
           4       0.74      0.71      0.72       510
           5       0.86      0.82      0.84       501

    accuracy                           0.75      3000
   macro avg       0.76      0.76      0.76      3000
weighted avg       0.75      0.75      0.75      3000



In [None]:
# defining parameter range 
param_grid = {'C': [0.1, 1, 10, 100, 1000],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['rbf']}  
  
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3) 
  
# fitting the model for grid search 
grid.fit(X_train, y_train) 

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


In [None]:
grid_predictions = grid.predict(X_test) 
  
# print classification report 
print(classification_report(y_test, grid_predictions)) 