#### Notebook 3A
This notebook fits a logistic regression model on the training images. 


In [1]:
# organize imports
from __future__ import print_function

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import numpy as np
import h5py
import os
import json
import pickle
import matplotlib.pyplot as plt
from sklearn.externals import joblib
from pprint import pprint

In [2]:
# load the user configs
with open('room_recognition/conf/conf_logistic.json') as f:    
  config = json.load(f)

# config variables
seed      = config["seed"]
features_path   = config["features_path"]
labels_path   = config["labels_path"]
results     = config["results"]
classifier_path = config["classifier_path"]
train_path    = config["train_path"]
num_classes   = config["num_classes"]
model_path=config["model_path"]

In [3]:
# import features and labels
h5f_data  = h5py.File(features_path, 'r')
h5f_label = h5py.File(labels_path, 'r')

features_string = h5f_data['dataset_1']
labels_string   = h5f_label['dataset_1']

trainfeatures = np.array(features_string)
trainlabels   = np.array(labels_string)

h5f_data.close()
h5f_label.close()

In [4]:
# verify the shape of features and labels
print ("[INFO] features shape: {}".format(trainfeatures.shape))
print ("[INFO] labels shape: {}".format(trainlabels.shape))



[INFO] features shape: (1040, 2048)
[INFO] labels shape: (1040,)


In [5]:
param ={"penalty" :['l1', 'l2'], "C" : [0.01,0.1,1,10] }

In [6]:
pprint(param)

{'C': [0.01, 0.1, 1, 10], 'penalty': ['l1', 'l2']}


instantiate the random search and fit it

In [7]:
# Use the param grid to search for best hyperparameters
# First create the base model to tune
logistic = LogisticRegression(random_state=seed)

In [8]:
# 10-fold cross validation
clf = GridSearchCV(logistic, param,
            cv=10, n_jobs=-1, verbose=3)
 

In [9]:
# Fit the random search model
clf.fit(trainfeatures, trainlabels)



Fitting 10 folds for each of 8 candidates, totalling 80 fits
[CV] C=0.01, penalty=l1 ..............................................
[CV] C=0.01, penalty=l1 ..............................................
[CV] C=0.01, penalty=l1 ..............................................
[CV] C=0.01, penalty=l1 ..............................................
[CV] ..... C=0.01, penalty=l1, score=0.5480769230769231, total=   5.7s
[CV] C=0.01, penalty=l1 ..............................................
[CV] .... C=0.01, penalty=l1, score=0.47115384615384615, total=   5.9s
[CV] C=0.01, penalty=l1 ..............................................
[CV] ..... C=0.01, penalty=l1, score=0.4519230769230769, total=   5.9s
[CV] C=0.01, penalty=l1 ..............................................
[CV] ..... C=0.01, penalty=l1, score=0.5288461538461539, total=   5.9s
[CV] C=0.01, penalty=l1 ..............................................
[CV] ..... C=0.01, penalty=l1, score=0.5288461538461539, total=   5.5s
[CV] C=0.01, pen

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   40.3s


[CV] ...... C=0.1, penalty=l1, score=0.8365384615384616, total=   5.9s
[CV] C=0.1, penalty=l1 ...............................................
[CV] ...... C=0.1, penalty=l1, score=0.8461538461538461, total=   6.0s
[CV] C=0.1, penalty=l1 ...............................................
[CV] ...... C=0.1, penalty=l1, score=0.8846153846153846, total=   5.7s
[CV] C=0.1, penalty=l2 ...............................................
[CV] ...... C=0.1, penalty=l1, score=0.8461538461538461, total=   5.8s
[CV] C=0.1, penalty=l2 ...............................................
[CV] ...... C=0.1, penalty=l1, score=0.8365384615384616, total=   6.1s
[CV] C=0.1, penalty=l2 ...............................................
[CV] ...... C=0.1, penalty=l1, score=0.8846153846153846, total=   6.0s
[CV] C=0.1, penalty=l2 ...............................................
[CV] ...... C=0.1, penalty=l2, score=0.7788461538461539, total=  11.2s
[CV] C=0.1, penalty=l2 ...............................................
[CV] .

[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed:  3.4min finished


GridSearchCV(cv=10, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=9, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'penalty': ['l1', 'l2'], 'C': [0.01, 0.1, 1, 10]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=3)

In [10]:
clf.best_params_


{'C': 0.1, 'penalty': 'l2'}

In [11]:
joblib.dump(clf.best_estimator_, model_path)

['room_recognition/output/logistic/model']