In [1]:
import pandas as pd
import joblib
from sklearn.linear_model import LogisticRegression
from utils import find_optimal_hyperparameters, fit_and_evaluate, load_model_from_json
from dataloader import load_and_split_data

In [2]:
X_train, X_test, y_train, y_test = load_and_split_data("data/preprocessed_dataset.csv",
                                                       "increase_stock",
                                                       "low_bike_demand",
                                                       test_size=0.2,
                                                       random_state=0)

In [3]:
model = LogisticRegression(random_state=0, max_iter=1000)
acc, precision, recall, f1, roc_auc, cm = fit_and_evaluate(model,
                                                           X_train,
                                                           y_train,
                                                           X_test,
                                                           y_test,
                                                           verbose=True)

Accuracy: 0.8688
Precision: 0.6600
Recall: 0.5690
F1: 0.6111
ROC AUC: 0.8939
Confusion Matrix: 
[[245  17]
 [ 25  33]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [4]:
param_space = {
    'penalty': ['l1', 'l2', 'elasticnet', None],  
    'C': [0.01, 0.1, 1, 10, 100],                
    'solver': ['liblinear', 'saga'],             
    'max_iter': [100, 200, 500],                 
    'l1_ratio': [0.1, 0.5, 0.9]                  
}


rf = LogisticRegression(random_state=0, max_iter=2000)

best_params = find_optimal_hyperparameters(model,
                                           param_space, 
                                           X_train, 
                                           y_train,
                                           cv=5, 
                                           scoring="recall", 
                                           n_jobs=-1, 
                                           save_dir="output/best_params", 
                                           save_file="lr_best.json")

450 fits failed out of a total of 1800.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
225 fits failed with the following error:
Traceback (most recent call last):
  File "/home/markus/.local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/markus/.local/lib/python3.9/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/home/markus/.local/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/home/markus/.local/lib/python3.9/site-packages/sklearn/linear_model/_logistic

Best parameters found:  {'C': 100, 'l1_ratio': 0.1, 'max_iter': 100, 'penalty': 'l1', 'solver': 'liblinear'}
Saving best parameters to 'output/best_params/lr_best.json'


In [7]:
model = LogisticRegression

model = load_model_from_json(model, "output/best_params/lr_best.json", extra_parms={"random_state" : 0})

acc, precision, recall, f1, roc_auc, cm = fit_and_evaluate(model, 
                                                           X_train, 
                                                           y_train, 
                                                           X_test, 
                                                           y_test, 
                                                           verbose=True)



Accuracy: 0.8688
Precision: 0.6538
Recall: 0.5862
F1: 0.6182
ROC AUC: 0.9029
Confusion Matrix: 
[[244  18]
 [ 24  34]]
