# Import variables

In [2]:
!pip install hyperopt

Collecting hyperopt
  Downloading hyperopt-0.2.5-py2.py3-none-any.whl (965 kB)
[K     |████████████████████████████████| 965 kB 1.6 MB/s eta 0:00:01
Collecting future
  Downloading future-0.18.2.tar.gz (829 kB)
[K     |████████████████████████████████| 829 kB 1.6 MB/s eta 0:00:01
Building wheels for collected packages: future
  Building wheel for future (setup.py) ... [?25ldone
[?25h  Created wheel for future: filename=future-0.18.2-py3-none-any.whl size=491059 sha256=c50f7ebbefb93f8cff5fe6cdb1ec7bcc9151307d8695e43fb037e4fbb7076cf8
  Stored in directory: /home/admin/.cache/pip/wheels/56/b0/fe/4410d17b32f1f0c3cf54cdfb2bc04d7b4b8f4ae377e2229ba0
Successfully built future
Installing collected packages: future, hyperopt
Successfully installed future-0.18.2 hyperopt-0.2.5


In [3]:
from hyperopt import tpe
from hyperopt import STATUS_OK
from hyperopt import Trials
from hyperopt import hp
from hyperopt import fmin
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import pandas
import mlflow

# Load Data

In [6]:
pandas_df = pandas.read_csv("training_data.csv")
X=pandas_df.iloc[:,:-1]
y=pandas_df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4284, stratify=y)

# Define objective function

In [7]:
N_FOLDS = 4
MAX_EVALS = 10

def objective(params, n_folds = N_FOLDS):
    """Objective function for Logistic Regression Hyperparameter Tuning"""

    # Perform n_fold cross validation with hyperparameters
    # Use early stopping and evaluate based on ROC AUC
    mlflow.sklearn.autolog()
    with mlflow.start_run(nested=True):
        clf = LogisticRegression(**params,random_state=0,verbose =0)
        scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='f1_macro')

        # Extract the best score
        best_score = max(scores)

        # Loss must be minimized
        loss = 1 - best_score

        # Dictionary with information for evaluation
        return {'loss': loss, 'params': params, 'status': STATUS_OK}

# Define parameter space

In [8]:

space = {
    'warm_start' : hp.choice('warm_start', [True, False]),
    'fit_intercept' : hp.choice('fit_intercept', [True, False]),
    'tol' : hp.uniform('tol', 0.00001, 0.0001),
    'C' : hp.uniform('C', 0.05, 3),
    'solver' : hp.choice('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
    'max_iter' : hp.choice('max_iter', range(5,1000))
}

# Create experiment

In [12]:
mlflow.set_experiment("Hyperopt_Optimization")

INFO: 'Hyperopt_Optimization' does not exist. Creating a new experiment


# Define Optimization Trials

In [13]:
# Algorithm
tpe_algorithm = tpe.suggest

# Trials object to track progress
bayes_trials = Trials()




with mlflow.start_run():
    best = fmin(fn = objective, space = space, algo = tpe.suggest, max_evals = MAX_EVALS, trials = bayes_trials)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

2021/02/17 17:46:00 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'd124e0956f2a41a98a108d77439be27d'
2021/02/17 17:46:04 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'd124e0956f2a41a98a108d77439be27d'
2021/02/17 17:46:05 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'd124e0956f2a41a98a108d77439be27d'
2021/02/17 17:46:05 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 10%|█         | 1/10 [00:06<01:01,  6.84s/trial, best loss: 0.48659700136301676]

2021/02/17 17:46:07 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '09da40c242ad4e1ca0f8ea908bf94621'
2021/02/17 17:46:08 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '09da40c242ad4e1ca0f8ea908bf94621'
2021/02/17 17:46:08 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '09da40c242ad4e1ca0f8ea908bf94621'
2021/02/17 17:46:09 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 20%|██        | 2/10 [00:10<00:38,  4.78s/trial, best loss: 0.4753470735734817] 

2021/02/17 17:46:10 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c5f7e29a70da473186ca9f48f5ea3a27'
2021/02/17 17:46:11 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c5f7e29a70da473186ca9f48f5ea3a27'
2021/02/17 17:46:12 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c5f7e29a70da473186ca9f48f5ea3a27'
2021/02/17 17:46:12 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 30%|███       | 3/10 [00:13<00:29,  4.20s/trial, best loss: 0.4753470735734817]

2021/02/17 17:46:14 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'cb553235461241e387b9c284a6c618e9'
2021/02/17 17:46:15 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'cb553235461241e387b9c284a6c618e9'
2021/02/17 17:46:15 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'cb553235461241e387b9c284a6c618e9'
2021/02/17 17:46:16 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 40%|████      | 4/10 [00:17<00:23,  3.90s/trial, best loss: 0.4753470735734817]

2021/02/17 17:46:17 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c57733e2d54946c19284891967a7b8ce'
2021/02/17 17:46:18 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c57733e2d54946c19284891967a7b8ce'
2021/02/17 17:46:19 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'c57733e2d54946c19284891967a7b8ce'
2021/02/17 17:46:19 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 50%|█████     | 5/10 [00:20<00:18,  3.72s/trial, best loss: 0.4753470735734817]

2021/02/17 17:46:21 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '19601d1efadf4e77942b80c0fc0dbc17'
2021/02/17 17:46:21 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '19601d1efadf4e77942b80c0fc0dbc17'
2021/02/17 17:46:22 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '19601d1efadf4e77942b80c0fc0dbc17'
2021/02/17 17:46:22 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 60%|██████    | 6/10 [00:24<00:14,  3.64s/trial, best loss: 0.4753470735734817]

2021/02/17 17:46:24 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'ca70851c4ada4c13905866b2c3de4953'
2021/02/17 17:46:25 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'ca70851c4ada4c13905866b2c3de4953'
2021/02/17 17:46:25 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'ca70851c4ada4c13905866b2c3de4953'
2021/02/17 17:46:26 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 70%|███████   | 7/10 [00:27<00:10,  3.50s/trial, best loss: 0.4753470735734817]

2021/02/17 17:46:27 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '3d53dec075b74ec996f349a30e6769de'
2021/02/17 17:46:28 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '3d53dec075b74ec996f349a30e6769de'
2021/02/17 17:46:29 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '3d53dec075b74ec996f349a30e6769de'
2021/02/17 17:46:29 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 80%|████████  | 8/10 [00:30<00:06,  3.49s/trial, best loss: 0.4753470735734817]

2021/02/17 17:46:31 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '8d5c72ce6f564d46bcd631d13d9009ac'
2021/02/17 17:46:31 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '8d5c72ce6f564d46bcd631d13d9009ac'
2021/02/17 17:46:32 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID '8d5c72ce6f564d46bcd631d13d9009ac'
2021/02/17 17:46:33 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

 90%|█████████ | 9/10 [00:33<00:03,  3.41s/trial, best loss: 0.4753470735734817]

2021/02/17 17:46:34 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'bc880203cd934e44b68dbbc0d0b4701c'
2021/02/17 17:46:35 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'bc880203cd934e44b68dbbc0d0b4701c'
2021/02/17 17:46:35 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run with ID 'bc880203cd934e44b68dbbc0d0b4701c'
2021/02/17 17:46:36 INFO mlflow.utils.autologging_utils: sklearn autologging will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow to the MLflow run

100%|██████████| 10/10 [00:37<00:00,  3.74s/trial, best loss: 0.4753470735734817]


In [14]:
best

{'C': 1.852303246830638,
 'fit_intercept': 0,
 'max_iter': 944,
 'solver': 1,
 'tol': 8.551889162947398e-05,
 'warm_start': 0}