In this Notebook an SVM model is fitted.

In [1]:
# organize imports
from __future__ import print_function

from sklearn import svm
from sklearn.model_selection import GridSearchCV
import numpy as np
import h5py
import os
import json
import pickle
import matplotlib.pyplot as plt
from pprint import pprint
from sklearn.externals import joblib

In [2]:
# load the user configs
with open('room_recognition/conf/conf_svm.json') as f:    
  config = json.load(f)

# config variables
seed      = config["seed"]
features_path   = config["features_path"]
labels_path   = config["labels_path"]
results     = config["results"]
classifier_path = config["classifier_path"]
train_path    = config["train_path"]
num_classes   = config["num_classes"]
model_path=config["model_path"]

In [3]:
# import features and labels
h5f_data  = h5py.File(features_path, 'r')
h5f_label = h5py.File(labels_path, 'r')

features_string = h5f_data['dataset_1']
labels_string   = h5f_label['dataset_1']

trainfeatures = np.array(features_string)
trainlabels   = np.array(labels_string)

h5f_data.close()
h5f_label.close()

In [4]:
# verify the shape of features and labels
print ("[INFO] features shape: {}".format(trainfeatures.shape))
print ("[INFO] labels shape: {}".format(trainlabels.shape))



[INFO] features shape: (1040, 2048)
[INFO] labels shape: (1040,)


In [5]:
 param = [
        {
            "kernel": ["linear"],
            "C": [1, 10, 100, 1000]
        },
        {
            "kernel": ["rbf"],
            "C": [1, 10, 100, 1000],
            "gamma": [1e-2, 1e-3, 1e-4, 1e-5]
        }
    ]

In [6]:
pprint(param)

[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
 {'C': [1, 10, 100, 1000],
  'gamma': [0.01, 0.001, 0.0001, 1e-05],
  'kernel': ['rbf']}]


instantiate the random search and fit it

In [7]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
svm =svm.SVC(probability=True)

In [8]:
# 10-fold cross validation, use 4 thread as each fold and each parameter set can be train in parallel
clf = GridSearchCV(svm, param,
            cv=10, n_jobs=-1, verbose=3)
 

In [9]:
# Fit the random search model
clf.fit(trainfeatures, trainlabels)



Fitting 10 folds for each of 20 candidates, totalling 200 fits
[CV] C=1, kernel=linear ..............................................
[CV] C=1, kernel=linear ..............................................
[CV] C=1, kernel=linear ..............................................
[CV] C=1, kernel=linear ..............................................
[CV] ..... C=1, kernel=linear, score=0.8461538461538461, total=  12.0s
[CV] C=1, kernel=linear ..............................................
[CV] ..... C=1, kernel=linear, score=0.8365384615384616, total=  12.3s
[CV] C=1, kernel=linear ..............................................
[CV] ..... C=1, kernel=linear, score=0.8557692307692307, total=  12.3s
[CV] C=1, kernel=linear ..............................................
[CV] ..... C=1, kernel=linear, score=0.7980769230769231, total=  12.7s
[CV] C=1, kernel=linear ..............................................
[CV] ..... C=1, kernel=linear, score=0.8942307692307693, total=  13.0s
[CV] C=1, kern

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:  1.5min


[CV] ... C=100, kernel=linear, score=0.8942307692307693, total=  12.8s
[CV] C=100, kernel=linear ............................................
[CV] ... C=100, kernel=linear, score=0.8942307692307693, total=  13.1s
[CV] C=100, kernel=linear ............................................
[CV] ................ C=100, kernel=linear, score=0.875, total=  12.9s
[CV] C=1000, kernel=linear ...........................................
[CV] ... C=100, kernel=linear, score=0.8173076923076923, total=  12.9s
[CV] C=1000, kernel=linear ...........................................
[CV] ... C=100, kernel=linear, score=0.8846153846153846, total=  18.5s
[CV] C=1000, kernel=linear ...........................................
[CV] .. C=1000, kernel=linear, score=0.8365384615384616, total=  18.4s
[CV] C=1000, kernel=linear ...........................................
[CV] ... C=100, kernel=linear, score=0.8846153846153846, total=  19.0s
[CV] C=1000, kernel=linear ...........................................
[CV] .

[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV]  C=10, gamma=0.01, kernel=rbf, score=0.8365384615384616, total=  37.1s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV]  C=10, gamma=0.01, kernel=rbf, score=0.8557692307692307, total=  36.7s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV]  C=10, gamma=0.01, kernel=rbf, score=0.8557692307692307, total=  36.9s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV]  C=10, gamma=0.01, kernel=rbf, score=0.8846153846153846, total=  37.4s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV]  C=10, gamma=0.01, kernel=rbf, score=0.8846153846153846, total=  37.2s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV]  C=10, gamma=0.01, kernel=rbf, score=0.8942307692307693, total=  37.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, sco

[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed: 19.3min


[CV]  C=100, gamma=0.01, kernel=rbf, score=0.7884615384615384, total=  55.1s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.8365384615384616, total=  55.9s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.8557692307692307, total=  54.3s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.8557692307692307, total=  55.2s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.8846153846153846, total=  55.8s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.8846153846153846, total=  56.5s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV]  C=100, gamma=0.01, kernel=rbf, score=0.8942307692307693, total=  54.4s
[CV] C=100, gamma=0.001, kernel=rbf

[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.875, total=  22.9s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV]  C=1000, gamma=0.001, kernel=rbf, score=0.8076923076923077, total=  22.2s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV]  C=1000, gamma=0.001, kernel=rbf, score=0.8846153846153846, total=  21.9s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.875, total=  22.6s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV]  C=1000, gamma=0.0001, kernel=rbf, score=0.8365384615384616, total=  20.5s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV]  C=1000, gamma=0.0001, kernel=rbf, score=0.7980769230769231, total=  20.4s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV]  C=1000, gamma=0.0001, kernel=rbf, sco

[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed: 30.9min finished


GridSearchCV(cv=10, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'kernel': ['linear'], 'C': [1, 10, 100, 1000]}, {'kernel': ['rbf'], 'C': [1, 10, 100, 1000], 'gamma': [0.01, 0.001, 0.0001, 1e-05]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=3)

In [10]:
clf.best_params_


{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}

In [12]:
 joblib.dump(clf.best_estimator_, model_path)

['room_recognition/output/svm/model']