In [1]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow_hub as hub
import os
import PIL.Image as Image
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

os.environ["CUDA_VISIBLE_DEVICES"]= "-1" # Force Tensorflow on CPU instead of GPU (seems to avoid an error with my CUDA compatible GPU)

  from ._conv import register_converters as _register_converters


In [2]:
# Load training data form NPZ file

train_data = "train.npz"
X_tr = np.load(train_data)["features"]
y_tr = np.load(train_data)["labels"]
images_tr = np.load(train_data)["pixels"]

valid_data = "valid.npz"
X_val= np.load(valid_data)["features"]
y_val = np.load(valid_data)["labels"]
images_val = np.load(valid_data)["pixels"]

test_data = "test.npz"
X_te= np.load(test_data)["features"]
y_te = np.load(test_data)["labels"]
images_te = np.load(test_data)["pixels"]

# Create a dictionnary for labels
labels_dict = {0: 'bike', 1 : 'car', 2: 'motorcycle', 3: 'other', 4:'truck', 5: 'van'}

# Create a pipeline for the Logistic Regression

In [30]:

from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

logreg_pipe = Pipeline([
    ('scaler', StandardScaler()), # With standardization works better here
    ('logreg', LogisticRegression(solver='saga', C=1000, multi_class= 'multinomial'))
])



TypeError: __init__() got an unexpected keyword argument 'iid'

In [4]:
logreg_pipe.fit(X_tr, y_tr)



Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('logreg', LogisticRegression(C=1000, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=None, penalty='l2', random_state=None, solver='saga',
          tol=0.0001, verbose=0, warm_start=False))])

In [5]:
# Accuracy on test set
accuracy = logreg_pipe.score(X_te, y_te)
print('Accuracy: {:.3f}'.format(accuracy))

Accuracy: 0.940


# Cross Validation, Grid Search and tuning hyperparameters

In [6]:
### Let's use a bigger dataset for training.

X_gs_cv = np.concatenate((X_tr,X_val), axis=0, out=None)  # X for Cross Validation and Grid Search
y_gs_cv = np.concatenate((y_tr,y_val), axis=0, out=None) 

In [7]:
X_gs_cv.shape 

(419, 2048)

In [25]:

from sklearn.model_selection import GridSearchCV

grid_params = {'logreg__solver' : ['saga'],
               'logreg__multi_class' : ['ovr','multinomial'],
               'logreg__C' : [0.01,0.1,1,10,100,1000]}

In [31]:
gs = GridSearchCV(logreg_pipe, grid_params, verbose = -1, cv = 8, n_jobs = 4, scoring='accuracy', refit = True, iid = True) # use 4 cores

In [32]:
gs_results = gs.fit(X_gs_cv, y_gs_cv)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:   49.0s
[Parallel(n_jobs=4)]: Done  96 out of  96 | elapsed:  1.2min finished


In [34]:
print('Best Params :',gs_results.best_params_)
print('Best Score :',gs_results.best_score_)

Best Params : {'logreg__C': 0.1, 'logreg__multi_class': 'multinomial', 'logreg__solver': 'saga'}
Best Score : 0.9379474940334129


In [35]:
# Calculate accuracy with the best estimator with GridSearch Cross Validation
best_estimator = gs_results.best_estimator_
best_estimator.score(X_te,y_te)

0.96