In [1]:
#from google.colab import drive
#drive.mount('/content/drive')
import sys
import os
#PATH = '/content/drive/My Drive/PPM_Stability/'
PATH = os.getcwd()
sys.path.append(PATH)

In [2]:
#!pip install xgboost==1.0.0

In [3]:
import EncoderFactory
from DatasetManager import DatasetManager
import BucketFactory

import pandas as pd
import numpy as np

from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample


import time
import os
import sys
from sys import argv
import pickle
from collections import defaultdict

from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
#import catboost

from tensorflow.keras.backend import print_tensor
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers.core import Dense, Activation, Dropout
from keras.preprocessing import sequence
from keras.models import Sequential, Model
from keras.layers import Dense, Embedding, Flatten, Input
from keras.layers import LSTM
from keras.optimizers import Nadam, RMSprop
from keras.layers.normalization import BatchNormalization


from hyperopt import Trials, STATUS_OK, tpe, fmin, hp
import hyperopt
from hyperopt.pyll.base import scope
from hyperopt.pyll.stochastic import sample

In [4]:
def create_and_evaluate_model(args):    
    print(args)
    global trial_nr
    trial_nr += 1
    
    start = time.time()
    score = 0
    for cv_iter in range(n_splits):
        
        dt_test_prefixes = dt_prefixes[cv_iter]
        dt_train_prefixes = pd.DataFrame()
        for cv_train_iter in range(n_splits): 
            if cv_train_iter != cv_iter:
                dt_train_prefixes = pd.concat([dt_train_prefixes, dt_prefixes[cv_train_iter]], axis=0, sort=False)
                
        #Balance unbalanced data
        if balanced_data == False:
            y = dataset_manager.get_label_numeric(dt_train_prefixes)
            case_ids = dataset_manager.get_case_ids(dt_train_prefixes)

            neg_cases = [case_ids[i] for i in range(len(case_ids)) if y[i] == 0]
            pos_cases = [case_ids[i] for i in range(len(case_ids)) if y[i] == 1]

            if len(neg_cases) > len(pos_cases):
                neg_cases = resample(neg_cases, replace = False, n_samples = len(pos_cases))
            elif len(neg_cases) < len(pos_cases):
                pos_cases = resample(pos_cases, replace = False, n_samples = len(neg_cases))

            bal_data = dt_train_prefixes.loc[dt_train_prefixes[dataset_manager.case_id_col].isin(neg_cases)]
            bal_data = bal_data.append(dt_train_prefixes.loc[dt_train_prefixes[dataset_manager.case_id_col].isin(pos_cases)])
            dt_train_prefixes = bal_data
                
        dt_test_bucket = dt_test_prefixes
        dt_train_bucket = dt_train_prefixes
        
        test_y = dataset_manager.get_label_numeric(dt_test_bucket)
        train_y = dataset_manager.get_label_numeric(dt_train_bucket)

        if len(set(train_y)) < 2:
            preds = [train_y[0]] * len(relevant_test_cases_bucket)
        else:
            feature_combiner = FeatureUnion([(method, EncoderFactory.get_encoder(method, **cls_encoder_args)) for method in methods])

            if cls_method == "xgboost":
                cls = xgb.XGBClassifier(objective='binary:logistic',
                                        n_estimators=args['n_estimators'],
                                        learning_rate= args['learning_rate'],
                                        subsample=args['subsample'],
                                        max_depth=int(args['max_depth']),
                                        colsample_bytree=args['colsample_bytree'],
                                        min_child_weight=int(args['min_child_weight']),
                                        seed=random_state)
            elif cls_method == "logit":
                cls = LogisticRegression(C=2**args['C'],
                                         random_state=random_state)

            if cls_method == "logit":
                pipeline = Pipeline([('encoder', feature_combiner), ('scaler', StandardScaler()), ('cls', cls)])
            else:
                pipeline = Pipeline([('encoder', feature_combiner), ('cls', cls)])
            pipeline.fit(dt_train_bucket, train_y)

            preds = pipeline.predict(dt_test_bucket)

        if balanced_data==True:
            acc = roc_auc_score(test_y, preds)
            score += acc
        else:
            acc = f1_score(test_y, preds)
            score += acc
        
        print('Accuracy:', acc)
        
    for k, v in args.items():
        fout_all.write("%s;%s;%s;%s;%s;%s;%s\n" % (trial_nr, dataset_name, cls_method, method_name, k, v, score / n_splits))   
    fout_all.write("%s;%s;%s;%s;%s;%s;%s\n" % (trial_nr, dataset_name, cls_method, method_name, "processing_time", time.time() - start, 0))   
    fout_all.flush()
    return {'loss': -score / n_splits, 'status': STATUS_OK, 'model': cls}

In [None]:
dataset_ref = "production"
params_dir = "params"
n_iter = 10
bucket_method = "prefix"
cls_encoding = "index"
cls_method = "logit"
balanced_data = True

if bucket_method == "state":
    bucket_encoding = "last"
else:
    bucket_encoding = "agg"

method_name = "%s_%s"%(bucket_method, cls_encoding)

dataset_ref_to_datasets = {
    "bpic2011": ["bpic2011_f%s"%formula for formula in range(1,5)],
    "bpic2015": ["bpic2015_%s_f2"%(municipality) for municipality in range(5,6)],
    "insurance": ["insurance_activity", "insurance_followup"],
    "bpic2012" : ["bpic2012_accepted"],
    "sepsis_cases": ["sepsis_cases_1"],#, "sepsis_cases_2", "sepsis_cases_4"],
    "production": ["production"]
}

encoding_dict = {
    "laststate": ["static", "last"],
    "agg": ["static", "agg"],
    "index": ["static", "index"],
    "combined": ["static", "last", "agg"],
    "3d" : []
}

datasets = [dataset_ref] if dataset_ref not in dataset_ref_to_datasets else dataset_ref_to_datasets[dataset_ref]
methods = encoding_dict[cls_encoding]
print(datasets)
    
train_ratio = 0.8
n_splits = 3
random_state = 22

# create results directory
if not os.path.exists(os.path.join(params_dir)):
    os.makedirs(os.path.join(params_dir))
    
for dataset_name in datasets:
    
    # read the data
    dataset_manager = DatasetManager(dataset_name)
    data = dataset_manager.read_dataset()
    data = dataset_manager.balance_data(data)

    cls_encoder_args = {'case_id_col': dataset_manager.case_id_col, 
                        'static_cat_cols': dataset_manager.static_cat_cols,
                        'static_num_cols': dataset_manager.static_num_cols, 
                        'dynamic_cat_cols': dataset_manager.dynamic_cat_cols,
                        'dynamic_num_cols': dataset_manager.dynamic_num_cols, 
                        'fillna': True}

    # determine min and max (truncated) prefix lengths
    min_prefix_length = 1
    if "traffic_fines" in dataset_name:
        max_prefix_length = 10
    else:
        max_prefix_length = 20

    # split into training and test
    print("splitting data")
    train, _ = dataset_manager.split_data_strict(data, train_ratio, split="temporal")
    train_prefixes = dataset_manager.generate_prefix_data(train, min_prefix_length, max_prefix_length)
    
    # Bucketing prefixes based on control flow
    bucketer_args = {'encoding_method':bucket_encoding, 
                     'case_id_col':dataset_manager.case_id_col, 
                     'cat_cols':[dataset_manager.activity_col], 
                     'num_cols':[], 
                     'random_state':random_state}
    if bucket_method == "cluster":
        bucketer_args["n_clusters"] = args["n_clusters"]
    bucketer = BucketFactory.get_bucketer(bucket_method, **bucketer_args)
    bucket_assignments_train = bucketer.fit_predict(train_prefixes)
    
    for bucket in set(bucket_assignments_train):
        print("Optimising %s of %s buckets" % (bucket, len(set(bucket_assignments_train))))
        
        relevant_train_cases_bucket = dataset_manager.get_indexes(train_prefixes)[bucket_assignments_train == bucket]
        dt_train_bucket = dataset_manager.get_relevant_data_by_indexes(train_prefixes, relevant_train_cases_bucket)
    
        # prepare chunks for CV
        dt_prefixes = []
        class_ratios = []
        for train_chunk, test_chunk in dataset_manager.get_stratified_split_generator(dt_train_bucket, n_splits=n_splits):
            class_ratios.append(dataset_manager.get_class_ratio(train_chunk))
            dt_prefixes.append(train_chunk)
        #del train
        
        # set up search space
        if cls_method == "xgboost":
            space = {'n_estimators': scope.int(hp.quniform('n_estimators', 1, 175, 5)),
                    'learning_rate': hp.uniform("learning_rate", 0, 5),
                     'subsample': hp.uniform("subsample", 0.5, 1),
                     'max_depth': scope.int(hp.quniform('max_depth', 1, 30, 1)),
                     'colsample_bytree': hp.uniform("colsample_bytree", 0, 1),
                     'min_child_weight': scope.int(hp.quniform('min_child_weight', 0, 6, 1))}
        elif cls_method == "logit":
            space = {'C': hp.uniform('C', -15, 15)}

        # optimize parameters
        trial_nr = 1
        trials = Trials()
        fout_all = open(os.path.join(PATH, params_dir, "param_optim_all_trials_%s_%s_%s_bucket_%s.csv" % (cls_method, dataset_name, method_name, bucket)), "w")
        if "prefix" in method_name:
            fout_all.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % ("iter", "dataset", "cls", "method", "nr_events", "param", "value", "score"))   
        else:
            fout_all.write("%s;%s;%s;%s;%s;%s;%s\n" % ("iter", "dataset", "cls", "method", "param", "value", "score"))   
        best = fmin(create_and_evaluate_model, space, algo=tpe.suggest, max_evals=n_iter, trials=trials, verbose=True)
        fout_all.close()

        # write the best parameters
        best_params = hyperopt.space_eval(space, best)
        outfile = os.path.join(PATH, params_dir, "optimal_params_%s_%s_%s_bucket_%s.pickle" % (cls_method, dataset_name, method_name, bucket))
        # write to file
        with open(outfile, "wb") as fout:
            pickle.dump(best_params, fout)


['production']
splitting data
prefix
Optimising 1 of 20 buckets
{'C': 10.285716541280703}                                                                                              
Accuracy:                                                                                                              
0.9443877551020408                                                                                                     
Accuracy:                                                                                                              
0.9173497267759564                                                                                                     
  0%|                                                                           | 0/10 [00:00<?, ?trial/s, best loss=?]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.963198394111743                                                                                                      
{'C': -4.816356745128603}                                                                                              
Accuracy:                                                                                                              
0.8767006802721088                                                                                                     
 10%|████▊                                           | 1/10 [00:00<00:03,  2.65trial/s, best loss: -0.9416452919965801]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.8697062841530054                                                                                                     
Accuracy:                                                                                                              
0.8671796587487455                                                                                                     
{'C': 9.147914907873162}                                                                                               
Accuracy:                                                                                                              
0.9443877551020408                                                                                                     
 20%|█████████▌                                      | 2/10 [00:00<00:02,  2.81trial/s, best loss: -0.9416452919965801]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.9069330601092898                                                                                                     
Accuracy:                                                                                                              
0.963198394111743                                                                                                      
{'C': 4.523236101123739}                                                                                               
 30%|██████████████▍                                 | 3/10 [00:01<00:02,  2.59trial/s, best loss: -0.9416452919965801]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.9258503401360544                                                                                                     
Accuracy:                                                                                                              
0.9009562841530054                                                                                                     
 30%|██████████████▍                                 | 3/10 [00:01<00:02,  2.59trial/s, best loss: -0.9416452919965801]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.9345935095349616                                                                                                     
{'C': 11.297998541406383}                                                                                              
Accuracy:                                                                                                              
0.9443877551020408                                                                                                     
Accuracy:                                                                                                              
0.9381830601092898                                                                                                     
 40%|███████████████████▏                            | 4/10 [00:01<00:02,  2.51trial/s, best loss: -0.9416452919965801]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.963198394111743                                                                                                      
{'C': -6.60907353658817}                                                                                               
Accuracy:                                                                                                              
0.8627551020408163                                                                                                     
 50%|████████████████████████                        | 5/10 [00:02<00:01,  2.56trial/s, best loss: -0.9485897364410244]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.8630464480874317                                                                                                     
Accuracy:                                                                                                              
0.8447641351622616                                                                                                     
{'C': 7.79212459577532}                                                                                                
Accuracy:                                                                                                              
0.9443877551020408                                                                                                     
Accuracy:                                                                                                              
0.9173497267759564                      

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.963198394111743                                                                                                      
{'C': 4.394470149862951}                                                                                               
 70%|█████████████████████████████████▌              | 7/10 [00:02<00:01,  2.75trial/s, best loss: -0.9485897364410244]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.9258503401360544                                                                                                     
Accuracy:                                                                                                              
0.9009562841530054                                                                                                     
Accuracy:                                                                                                              
0.9345935095349616                                                                                                     
{'C': -12.207863412397213}                                                                                             
 80%|██████████████████████████████████████▍         | 8/10 [00:03<00:00,  2.61trial/s, best loss: -0.9485897364410244]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.5                                                                                                                    
Accuracy:                                                                                                              
0.5                                                                                                                    
Accuracy:                                                                                                              
0.5                                                                                                                    
{'C': -13.183883850869528}                                                                                             
Accuracy:                                                                                                              
0.5                                     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
1.0                                                                                                                    
Accuracy:                                                                                                              
1.0                                                                                                                    
{'C': 5.178604528327806}                                                                                               
 10%|██████▎                                                        | 1/10 [00:00<00:04,  1.80trial/s, best loss: -1.0]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
1.0                                                                                                                    
 10%|██████▎                                                        | 1/10 [00:00<00:04,  1.80trial/s, best loss: -1.0]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy:                                                                                                              
1.0                                                                                                                    
 10%|██████▎                                                        | 1/10 [00:00<00:04,  1.80trial/s, best loss: -1.0]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
1.0                                                                                                                    
{'C': 2.2076455718328134}                                                                                              
Accuracy:                                                                                                              
1.0                                                                                                                    
 20%|████████████▌                                                  | 2/10 [00:01<00:04,  1.76trial/s, best loss: -1.0]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
1.0                                                                                                                    
Accuracy:                                                                                                              
1.0                                                                                                                    
{'C': 7.874075827116354}                                                                                               
Accuracy:                                                                                                              
1.0                                                                                                                    
 30%|██████████████████▉                                            | 3/10 [00:01<00:03,  1.81trial/s, best loss: -1.0]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
1.0                                                                                                                    
Accuracy:                                                                                                              
1.0                                                                                                                    
{'C': 1.2697939808192054}                                                                                              
 40%|█████████████████████████▏                                     | 4/10 [00:02<00:03,  1.86trial/s, best loss: -1.0]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
0.9891304347826088                                                                                                     
Accuracy:                                                                                                              
1.0                                                                                                                    
Accuracy:                                                                                                              
0.9913793103448276                                                                                                     
{'C': -10.761288211204938}                                                                                             
Accuracy:                                                                                                              
0.5869565217391304                      

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(



Accuracy:                                                                                                              
1.0                                                                                                                    
Accuracy:                                                                                                              
1.0                                                                                                                    
{'C': 3.2501176882363048}                                                                                              
 80%|██████████████████████████████████████████████████▍            | 8/10 [00:04<00:01,  1.93trial/s, best loss: -1.0]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy:                                                                                                              
1.0                                                                                                                    
 80%|██████████████████████████████████████████████████▍            | 8/10 [00:04<00:01,  1.93trial/s, best loss: -1.0]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy:                                                                                                              
1.0                                                                                                                    
 80%|██████████████████████████████████████████████████▍            | 8/10 [00:04<00:01,  1.93trial/s, best loss: -1.0]




Accuracy:                                                                                                              
1.0                                                                                                                    
{'C': 7.7949103543957605}                                                                                              
Accuracy:                                                                                                              
1.0                                                                                                                    
 90%|████████████████████████████████████████████████████████▋      | 9/10 [00:04<00:00,  1.87trial/s, best loss: -1.0]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy:                                                                                                              
1.0                                                                                                                    
 90%|████████████████████████████████████████████████████████▋      | 9/10 [00:05<00:00,  1.87trial/s, best loss: -1.0]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy:                                                                                                              
1.0                                                                                                                    
100%|██████████████████████████████████████████████████████████████| 10/10 [00:05<00:00,  1.87trial/s, best loss: -1.0]
Optimising 3 of 20 buckets
{'C': -9.661078230543083}                                                                                              
  0%|                                                                           | 0/10 [00:00<?, ?trial/s, best loss=?]




Accuracy:                                                                                                              
0.7686688311688311                                                                                                     
Accuracy:                                                                                                              
0.7621031746031744                                                                                                     
Accuracy:                                                                                                              
0.7932539682539683                                                                                                     
{'C': -2.1264683527647605}                                                                                             
Accuracy:                                                                                                              
0.9886363636363636                      

Accuracy:                                                                                                              
1.0                                                                                                                    
Accuracy:                                                                                                              
1.0                                                                                                                    
100%|██████████████████████████████████████████████████████████████| 10/10 [00:08<00:00,  1.15trial/s, best loss: -1.0]
Optimising 5 of 20 buckets
{'C': 10.623093419938346}                                                                                              
Accuracy:                                                                                                              
1.0                                                                                                                    
Accuracy:    

1.0                                                                                                                    
Accuracy:                                                                                                              
1.0                                                                                                                    
{'C': 4.583859123451706}                                                                                               
Accuracy:                                                                                                              
1.0                                                                                                                    
Accuracy:                                                                                                              
1.0                                                                                                                    
Accuracy:                               

Accuracy:                                                                                                              
0.9225490196078431                                                                                                     
{'C': -8.084848176535182}                                                                                              
Accuracy:                                                                                                              
0.9519607843137255                                                                                                     
Accuracy:                                                                                                              
0.9372549019607843                                                                                                     
Accuracy:                                                                                                              
0.9372549019607843                      

In [None]:
for bucket in set(bucket_assignments_train):
    params_path = os.path.join(PATH, params_dir, "optimal_params_%s_%s_%s_bucket_%s.pickle" % 
                               (cls_method, dataset_name, method_name, bucket))
    with open(params_path, 'rb') as f:
        best_params = pickle.load(f)
    
    print ("Bucket %s" % bucket)
    print(best_params)