In [1]:
from sklearn.linear_model import LogisticRegression
from main import plotData, readData, TRAIN_SET, TEST_SET
import numpy as np

In [2]:
X_train, y_train=readData(TRAIN_SET, flatten=True, discrete=False)
X_train = np.array(X_train)
y_train = np.array(y_train)

X_test, y_test=readData(TEST_SET, flatten=True, discrete=False)
X_test = np.array(X_test)
y_test = np.array(y_test)

## Hyperparameter search

In [3]:
lr_param_grid = {
    'C': [1e-2, 3e-2, 1e-1, 3e-1, 1.0],
    'solver': ['newton-cg', 'lbfgs', 'liblinear'],
}

In [4]:
from sklearn.model_selection import GridSearchCV

#Grid search with 5-fold cross-validation
clf = GridSearchCV(estimator=LogisticRegression(), cv=5, param_grid=lr_param_grid, scoring='accuracy')
clf.fit(X_train, y_train)

best_clf_index = np.argmax(clf.cv_results_['mean_test_score'])
best_clf_score = clf.cv_results_['mean_test_score'][best_clf_index]
best_clf_std =clf.cv_results_['std_test_score'][best_clf_index]
    
estimator = clf.best_estimator_

print("Best hyperparameter setting: {}".format(clf.best_params_))
print("Best cv score: {:f}, standard deviation: {:f}".format(best_clf_score, best_clf_std))

Best hyperparameter setting: {'C': 0.3, 'solver': 'lbfgs'}
Best cv score: 0.947058, standard deviation: 0.003233


## Evaluate on the test set

In [5]:
score = estimator.score(X_test, y_test)
print(score)

0.913303437967


In [9]:
print(estimator)

LogisticRegression(C=0.3, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='lbfgs', tol=0.0001,
          verbose=0, warm_start=False)
