In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
import pandas as pd
from sklearn.model_selection import GridSearchCV

In [2]:
X_train = pd.read_pickle('./data/X_train_pickle.pkl')
X_test = pd.read_pickle('./data/X_test_pickle.pkl')
y_train = pd.read_pickle('./data/y_train_pickle.pkl')
y_test = pd.read_pickle('./data/y_test_pickle.pkl')

# Logistic Regression

In [None]:
# Hyper tuning LR within a range of degrees
cv = 5
scoring = ['accuracy','f1']
log_scale = [.001, .01, .1, 1, 10, 100, 1000]

In [None]:
lr = LogisticRegression()
param_grid = [{'C': log_scale}]

grid_search = GridSearchCV(lr, param_grid, cv=cv, scoring=scoring, # Just passing in the variables declared above
                           refit='f1',                              # Uses f1 as the metric to determine 'best' models.       
                           n_jobs=-2,                               # Uses all but one of machine's processors.
                           return_train_score=True,                 # ???
                           verbose=50)                              # verbose > 0 gives us a progress bar to check on.
grid_search.fit(X_train, y_train) 

In [None]:
grid_search.best_estimators_

In [None]:
# End of GridSearch

In [3]:
LR = LogisticRegression(random_state=0,           # Just like np.random.seed 
                        solver='lbfgs',           # diminishes *ALL* warnings. Different solvers diminish different warnings. Andy did it this way.
                        C=1e9,                    # 1 x 10 ^ 9th power. Why is this the value of C?
                        multi_class='multinomial' # indicates we're using multiple features and not just one vs target. 
                       ).fit(X_train, y_train)    # run of the mill fit method with training data.

# Checking Accuracy 
LR_accuracy = round(LR.score(X_test,y_test)*100, 2)

# Checking F1 Score
pred = LR.predict(X_test) # Use LR to predict y values via X_test values.
LR_f1 = round(f1_score(y_test, pred)*100, 2)

lr = {
    'accuracy': LR_accuracy,
    'f1': LR_f1
}
%store lr
print("Logistic Regression correctly identified {}% of the True Values".format(LR_accuracy))
print('The F1 score determined that the LR correctly identified {}% of the True Positives'.format(LR_f1))

Stored 'lr' (dict)
Logistic Regression correctly identified 84.66% of the True Values
The F1 score determined that the LR correctly identified 66.13% of the True Positives
