# Import variables

In [1]:
!pip install hyperopt



In [2]:
from hyperopt import tpe
from hyperopt import STATUS_OK
from hyperopt import Trials
from hyperopt import hp
from hyperopt import fmin
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import pandas
import mlflow

# Load Data

In [3]:
pandas_df = pandas.read_csv("training_data.csv")
X=pandas_df.iloc[:,:-1]
y=pandas_df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4284, stratify=y)

# Define objective function

In [4]:
N_FOLDS = 4
MAX_EVALS = 10

def objective(params, n_folds = N_FOLDS):
    """Objective function for Logistic Regression Hyperparameter Tuning"""

    # Perform n_fold cross validation with hyperparameters
    # Use early stopping and evaluate based on ROC AUC
    mlflow.sklearn.autolog()
    with mlflow.start_run(nested=True):
        clf = LogisticRegression(**params,random_state=0,verbose =0)
        scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='f1_macro')

        # Extract the best score
        best_score = max(scores)

        # Loss must be minimized
        loss = 1 - best_score

        # Dictionary with information for evaluation
        return {'loss': loss, 'params': params, 'status': STATUS_OK}

# Define parameter space

In [5]:

space = {
    'warm_start' : hp.choice('warm_start', [True, False]),
    'fit_intercept' : hp.choice('fit_intercept', [True, False]),
    'tol' : hp.uniform('tol', 0.00001, 0.0001),
    'C' : hp.uniform('C', 0.05, 3),
    'solver' : hp.choice('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
    'max_iter' : hp.choice('max_iter', range(5,1000))
}

# Create experiment

In [6]:
mlflow.set_experiment("Hyperopt_Optimization")

INFO: 'Hyperopt_Optimization' does not exist. Creating a new experiment


# Define Optimization Trials

In [7]:
# Algorithm
tpe_algorithm = tpe.suggest

# Trials object to track progress
bayes_trials = Trials()




with mlflow.start_run():
    best = fmin(fn = objective, space = space, algo = tpe.suggest, max_evals = MAX_EVALS, trials = bayes_trials)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

2021/07/02 20:37:02 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c36d6f57bcbc40a58662c4f68a7ba67e'
2021/07/02 20:37:03 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c36d6f57bcbc40a58662c4f68a7ba67e'
2021/07/02 20:37:04 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c36d6f57bcbc40a58662c4f68a7ba67e'
2021/07/02 20:37:04 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 10%|█         | 1/10 [00:04<00:41,  4.66s/trial, best loss: 0.4753470735734817]

2021/07/02 20:37:06 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '184a086e73b9452a8cc70827761e9f09'
2021/07/02 20:37:08 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '184a086e73b9452a8cc70827761e9f09'
2021/07/02 20:37:08 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '184a086e73b9452a8cc70827761e9f09'
2021/07/02 20:37:09 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 20%|██        | 2/10 [00:09<00:40,  5.03s/trial, best loss: 0.4753470735734817]

2021/07/02 20:37:12 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '2aa8e313ae3c4d5b8fabca1c16b9015d'
2021/07/02 20:37:13 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '2aa8e313ae3c4d5b8fabca1c16b9015d'
2021/07/02 20:37:14 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '2aa8e313ae3c4d5b8fabca1c16b9015d'
2021/07/02 20:37:15 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 30%|███       | 3/10 [00:17<00:41,  5.97s/trial, best loss: 0.4753470735734817]

2021/07/02 20:37:19 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '655808ca91234c68be87466d1571f17e'
2021/07/02 20:37:20 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '655808ca91234c68be87466d1571f17e'
2021/07/02 20:37:21 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '655808ca91234c68be87466d1571f17e'
2021/07/02 20:37:22 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 40%|████      | 4/10 [00:22<00:34,  5.67s/trial, best loss: 0.4753470735734817]

2021/07/02 20:37:24 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '4035675933ba4cdd848b1f18575624cc'
2021/07/02 20:37:25 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '4035675933ba4cdd848b1f18575624cc'
2021/07/02 20:37:25 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '4035675933ba4cdd848b1f18575624cc'
2021/07/02 20:37:26 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 50%|█████     | 5/10 [00:26<00:25,  5.05s/trial, best loss: 0.4753470735734817]

2021/07/02 20:37:28 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'f9381d81b6da46babc2f74777dccecd3'
2021/07/02 20:37:29 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'f9381d81b6da46babc2f74777dccecd3'
2021/07/02 20:37:29 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'f9381d81b6da46babc2f74777dccecd3'
2021/07/02 20:37:31 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 60%|██████    | 6/10 [00:30<00:19,  4.86s/trial, best loss: 0.4753470735734817]

2021/07/02 20:37:33 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'fa2134f53f8448e7a9463340ec3ecdfb'
2021/07/02 20:37:33 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'fa2134f53f8448e7a9463340ec3ecdfb'
2021/07/02 20:37:34 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'fa2134f53f8448e7a9463340ec3ecdfb'
2021/07/02 20:37:34 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 70%|███████   | 7/10 [00:34<00:13,  4.51s/trial, best loss: 0.4753470735734817]

2021/07/02 20:37:36 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '3b0ef856cf674d32aec5c0fc7688ef39'
2021/07/02 20:37:37 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '3b0ef856cf674d32aec5c0fc7688ef39'
2021/07/02 20:37:38 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '3b0ef856cf674d32aec5c0fc7688ef39'
2021/07/02 20:37:39 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 80%|████████  | 8/10 [00:38<00:09,  4.51s/trial, best loss: 0.4753470735734817]

2021/07/02 20:37:41 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'fea1f3f3861840e8baf41285e4bd3f81'
2021/07/02 20:37:41 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'fea1f3f3861840e8baf41285e4bd3f81'
2021/07/02 20:37:42 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'fea1f3f3861840e8baf41285e4bd3f81'
2021/07/02 20:37:43 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 90%|█████████ | 9/10 [00:42<00:04,  4.22s/trial, best loss: 0.4753470735734817]

2021/07/02 20:37:44 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'de7e78d338fa49558931dce5d68158f6'
2021/07/02 20:37:45 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'de7e78d338fa49558931dce5d68158f6'
2021/07/02 20:37:45 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'de7e78d338fa49558931dce5d68158f6'
2021/07/02 20:37:46 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

100%|██████████| 10/10 [00:46<00:00,  4.64s/trial, best loss: 0.4753470735734817]


In [8]:
best

{'C': 1.7015010837141984,
 'fit_intercept': 0,
 'max_iter': 533,
 'solver': 3,
 'tol': 3.247189008512365e-05,
 'warm_start': 0}