# Hyperparameter tuning 

#### This procedure based on: https://docs.ray.io/en/latest/tune/examples/tune-sklearn.html  and https://towardsdatascience.com/hyperparameter-tuning-the-random-forest-in-python-using-scikit-learn-28d2aa77dd74

pip install "ray[tune]" tune-sklearn

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import average_precision_score
from imblearn.metrics import geometric_mean_score
    

### Importing data and train test split

In [2]:
data = pd.read_csv("OSA_complete_patients.csv", index_col = 0)

In [3]:
features = ['Sex', 'Age', 'Current_smoker', 'Former_smoker',
       'Sedentary', 'Height', 'Weight', 'Cervical_perimeter',
       'Abdominal_perimeter', 'Systolic_BP', 'Diastolic_BP',
       'Maxillofacial_profile', 'BMI', 'High_BP', 'Asthma', 'Rhinitis', 'COPD',
       'Respiratory_fail', 'Myocardial_infarct', 'Coronary_fail',
       'Arrhythmias', 'Stroke', 'Heart_fail', 'Arteriopathy', 'Gastric_reflux',
       'Glaucoma', 'Diabetes', 'Hypercholesterolemia', 'Hypertriglyceridemia',
       'Hypo(er)thyroidism', 'Depression', 'Obesity', 'Dysmorphology',
       'Restless_Leg_Syndrome', 'Snoring', 'Diurnal_somnolence',
       'Driving_drowsiness', 'Morning_fatigue', 'Morning_headache',
       'Memory_problem', 'Nocturnal_perspiration',
       'Shortness_of_breath_on_exertion', 'Nocturia', 'Drowsiness_accident',
       'Near_miss_accident', 'Respiratory_arrest', 'Epworth_scale',
       'Pichots_scale', 'Depression_scale']
X=data[features]  # Features  
y=data['Severity']  # Labels

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=0, 
                                                    shuffle=True, 
                                                    stratify=y)

## Random Forest 

In [5]:
rf = RandomForestClassifier(random_state = 42)
rf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 42,
 'verbose': 0,
 'warm_start': False}

#### Will focus on the following parameters: 
    
n_estimators = number of trees in the foreset
max_features = max number of features considered for splitting a node
max_depth = max number of levels in each decision tree
min_samples_split = min number of data points placed in a node before the node is split
min_samples_leaf = min number of data points allowed in a leaf node
bootstrap = method for sampling data points (with or without replacement)

### Create a parameter grid

In [6]:
n_estimators = [int(x) for x in np.linspace(start = 100, stop = 1000, num = 10)]  # will test numbers 100-1000 counting by 10
max_features = ['auto', 'sqrt']
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
random_grid

{'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
 'max_features': ['auto', 'sqrt'],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
 'min_samples_split': [2, 5, 10],
 'min_samples_leaf': [1, 2, 4],
 'bootstrap': [True, False]}

### Random grid

In [7]:
# Random search of parameters, using 5 fold cross validation, 10 iterations.  
# search across 100 different combinations
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 10, verbose = 2, scoring = 'f1_weighted', random_state=42) # could add the 'verbose = 1-3' parameter to know what's going on

# Fit the random search model
rf_random.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   4.8s
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   7.4s
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   7.1s
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   6.2s
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   8.7s
[CV] END bootstrap=False, max_depth=90, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=300; total time=  30.6s
[CV] END bootstrap=False, max_depth=90, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=30

  warn(


[CV] END bootstrap=False, max_depth=60, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=300; total time=  47.0s


  warn(


[CV] END bootstrap=False, max_depth=60, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=300; total time=  45.1s


  warn(


[CV] END bootstrap=False, max_depth=60, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=300; total time=  35.8s


  warn(


[CV] END bootstrap=False, max_depth=60, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=300; total time=  39.0s


  warn(


[CV] END bootstrap=False, max_depth=60, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=300; total time=  40.7s
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=700; total time= 1.0min
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=700; total time= 1.0min
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=700; total time= 1.0min
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=700; total time=  49.0s
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=700; total time=  19.2s


  warn(


[CV] END bootstrap=False, max_depth=80, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=  18.6s


  warn(


[CV] END bootstrap=False, max_depth=80, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=  18.9s


  warn(


[CV] END bootstrap=False, max_depth=80, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=  18.4s


  warn(


[CV] END bootstrap=False, max_depth=80, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=  18.1s


  warn(


[CV] END bootstrap=False, max_depth=80, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=  18.3s
[CV] END bootstrap=False, max_depth=60, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=200; total time=   7.8s
[CV] END bootstrap=False, max_depth=60, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=200; total time=   7.2s
[CV] END bootstrap=False, max_depth=60, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=200; total time=   7.5s
[CV] END bootstrap=False, max_depth=60, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=200; total time=   7.1s
[CV] END bootstrap=False, max_depth=60, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=200; total time=   7.1s


  warn(


[CV] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=1000; total time=  40.4s


  warn(


[CV] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=1000; total time=  40.7s


  warn(


[CV] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=1000; total time=  40.3s


  warn(


[CV] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=1000; total time=  40.4s


  warn(


[CV] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=1000; total time=  40.1s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   1.5s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   1.5s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   1.5s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   2.0s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   1.7s


  warn(


[CV] END bootstrap=True, max_depth=100, max_features=auto, min_samples_leaf=4, min_samples_split=2, n_estimators=600; total time=  13.7s


  warn(


[CV] END bootstrap=True, max_depth=100, max_features=auto, min_samples_leaf=4, min_samples_split=2, n_estimators=600; total time=  13.2s


  warn(


[CV] END bootstrap=True, max_depth=100, max_features=auto, min_samples_leaf=4, min_samples_split=2, n_estimators=600; total time=  13.8s


  warn(


[CV] END bootstrap=True, max_depth=100, max_features=auto, min_samples_leaf=4, min_samples_split=2, n_estimators=600; total time=  13.7s


  warn(


[CV] END bootstrap=True, max_depth=100, max_features=auto, min_samples_leaf=4, min_samples_split=2, n_estimators=600; total time=  13.7s


  warn(


[CV] END bootstrap=True, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=5, n_estimators=1000; total time=  26.2s


  warn(


[CV] END bootstrap=True, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=5, n_estimators=1000; total time=  26.0s


  warn(


[CV] END bootstrap=True, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=5, n_estimators=1000; total time=  26.2s


  warn(


[CV] END bootstrap=True, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=5, n_estimators=1000; total time=  26.1s


  warn(


[CV] END bootstrap=True, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=5, n_estimators=1000; total time=  25.8s


  warn(


### View the best parameters from fitting the random search

In [8]:
rf_random.best_params_   # best parameters based on F1 score

{'n_estimators': 300,
 'min_samples_split': 2,
 'min_samples_leaf': 2,
 'max_features': 'auto',
 'max_depth': 60,
 'bootstrap': False}

### Comparing to base model

In [9]:
# base model with default settings
base_model = RandomForestClassifier(random_state = 42)
base_model.fit(X_train, y_train)
base_pred=base_model.predict(X_test) 
print("Model Performance")
print("Base Model Accuracy:", accuracy_score(y_test, base_pred))
print("Base Model F1:", f1_score(y_test, base_pred, average='weighted'))


Model Performance
Base Model Accuracy: 0.45944087992667276
Base Model F1: 0.4149768035190259


In [10]:
# optimized model
random_model= RandomForestClassifier(random_state = 42, n_estimators= 300, min_samples_split= 2,min_samples_leaf= 2,
                                     max_features='auto', max_depth= 60, bootstrap =False)  # settings based on best params
random_model.fit(X_train, y_train)
random_pred = random_model.predict(X_test)
print("Model Performance")
print("Best Random Model Accuracy:", accuracy_score(y_test, random_pred))
print("Best Random Model F1:", f1_score(y_test, random_pred, average='weighted'))



  warn(


Model Performance
Best Random Model Accuracy: 0.46310724106324475
Best Random Model F1: 0.4173338524484995


### Increasing iterations. Only 2-fold cross-validation

In [11]:
rf_random_nocv = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 20, verbose = 2, scoring = 'f1_weighted', random_state=42, cv = 2) 

# Fit the random search model
rf_random_nocv.fit(X_train, y_train)

Fitting 2 folds for each of 20 candidates, totalling 40 fits
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   1.5s
[CV] END bootstrap=True, max_depth=50, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   1.6s
[CV] END bootstrap=False, max_depth=90, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=300; total time=   6.6s
[CV] END bootstrap=False, max_depth=90, max_features=sqrt, min_samples_leaf=4, min_samples_split=10, n_estimators=300; total time=   6.6s


  warn(


[CV] END bootstrap=False, max_depth=60, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=300; total time=   7.3s


  warn(


[CV] END bootstrap=False, max_depth=60, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=300; total time=   7.7s
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=700; total time=  13.2s
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=700; total time=  12.6s


  warn(


[CV] END bootstrap=False, max_depth=80, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=  11.8s


  warn(


[CV] END bootstrap=False, max_depth=80, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=  12.5s
[CV] END bootstrap=False, max_depth=60, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=200; total time=   5.1s
[CV] END bootstrap=False, max_depth=60, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=200; total time=   5.7s


  warn(


[CV] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=1000; total time=  26.3s


  warn(


[CV] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=1000; total time=  25.6s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   1.1s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   1.1s


  warn(


[CV] END bootstrap=True, max_depth=100, max_features=auto, min_samples_leaf=4, min_samples_split=2, n_estimators=600; total time=   9.4s


  warn(


[CV] END bootstrap=True, max_depth=100, max_features=auto, min_samples_leaf=4, min_samples_split=2, n_estimators=600; total time=   8.6s


  warn(


[CV] END bootstrap=True, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=5, n_estimators=1000; total time=  17.6s


  warn(


[CV] END bootstrap=True, max_depth=50, max_features=auto, min_samples_leaf=2, min_samples_split=5, n_estimators=1000; total time=  17.5s


  warn(


[CV] END bootstrap=False, max_depth=20, max_features=auto, min_samples_leaf=2, min_samples_split=10, n_estimators=200; total time=   4.7s


  warn(


[CV] END bootstrap=False, max_depth=20, max_features=auto, min_samples_leaf=2, min_samples_split=10, n_estimators=200; total time=   4.1s
[CV] END bootstrap=False, max_depth=20, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=400; total time=  10.9s
[CV] END bootstrap=False, max_depth=20, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=400; total time=  11.2s
[CV] END bootstrap=True, max_depth=70, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=500; total time=  12.8s
[CV] END bootstrap=True, max_depth=70, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=500; total time=  11.4s
[CV] END bootstrap=False, max_depth=70, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=600; total time=  15.7s
[CV] END bootstrap=False, max_depth=70, max_features=sqrt, min_samples_leaf=2, min_samples_split=10, n_estimators=600; total time=  14.9s
[CV] END bootstrap=False, max_depth=110,

  warn(


[CV] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=  12.8s


  warn(


[CV] END bootstrap=False, max_depth=50, max_features=auto, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=  12.9s
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=   9.1s
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=500; total time=   9.3s


  warn(


[CV] END bootstrap=True, max_depth=70, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=300; total time=   6.1s


  warn(


[CV] END bootstrap=True, max_depth=70, max_features=auto, min_samples_leaf=1, min_samples_split=2, n_estimators=300; total time=   7.0s


  warn(


[CV] END bootstrap=False, max_depth=30, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=900; total time=  24.8s


  warn(


[CV] END bootstrap=False, max_depth=30, max_features=auto, min_samples_leaf=2, min_samples_split=2, n_estimators=900; total time=  25.7s


  warn(


[CV] END bootstrap=True, max_depth=None, max_features=auto, min_samples_leaf=4, min_samples_split=2, n_estimators=800; total time=  13.7s


  warn(


[CV] END bootstrap=True, max_depth=None, max_features=auto, min_samples_leaf=4, min_samples_split=2, n_estimators=800; total time=  13.2s


  warn(


In [12]:
rf_random_nocv.best_params_   # best parameters based on F1 score

{'n_estimators': 500,
 'min_samples_split': 10,
 'min_samples_leaf': 1,
 'max_features': 'auto',
 'max_depth': 80,
 'bootstrap': False}

In [13]:
# optimized model
random_model_nocv= RandomForestClassifier(random_state = 42, n_estimators= 300, min_samples_split= 2,min_samples_leaf= 1,
                                     max_features='auto', max_depth= 70, bootstrap =True)  # settings based on best params
random_model_nocv.fit(X_train, y_train)
random_nocv_pred = random_model_nocv.predict(X_test)
print("Model Performance")
print("Best Random Model Accuracy:", accuracy_score(y_test, random_nocv_pred))
print("Best Random Model F1:", f1_score(y_test, random_nocv_pred, average='weighted'))


  warn(


Model Performance
Best Random Model Accuracy: 0.4605866177818515
Best Random Model F1: 0.4120066699154584


### Grid Search 

#### Testing multiple classifiers at once: https://towardsdatascience.com/how-to-tune-multiple-ml-models-with-gridsearchcv-at-once-9fcebfcc6c23

## HyperOpt

#### Basd on https://machinelearningmastery.com/hyperopt-for-automated-machine-learning-with-scikit-learn/
pip install hyperopt

pip install git+https://github.com/hyperopt/hyperopt-sklearn

In [24]:
from hpsklearn import HyperoptEstimator, random_forest_classifier, extra_trees_classifier, xgboost_classification, lightgbm_classification
from hpsklearn import any_classifier
from hpsklearn import any_preprocessing
from hyperopt import tpe


In [27]:
# define model
#testing any classifier
# any preprocessing
# Tree of Parzen Estimators (TPE) algorithm
#clf = [random_forest_classifier("RF"), extra_trees_classifier("ET"), xgboost_classification("XGB"), lightgbm_classification("LG")]
model = HyperoptEstimator(classifier=any_classifier('cla'), preprocessing=any_preprocessing('pre'), 
                          algo=tpe.suggest, max_evals=50, trial_timeout=30)

In [28]:
# perform the search
model.fit(X_train, y_train)
# summarize performance
acc = model.score(X_test, y_test)
print("Accuracy: %.3f" % acc)
# summarize the best model
print(model.best_model())

100%|██████████| 1/1 [00:14<00:00, 14.33s/trial, best loss: 0.5748270232449662]
100%|██████████| 2/2 [00:02<00:00,  2.78s/trial, best loss: 0.56044468630957]
100%|██████████| 3/3 [00:31<00:00, 31.71s/trial, best loss: 0.56044468630957]
100%|██████████| 4/4 [00:31<00:00, 31.78s/trial, best loss: 0.56044468630957]
100%|██████████| 5/5 [00:31<00:00, 31.76s/trial, best loss: 0.56044468630957]
100%|██████████| 6/6 [00:02<00:00,  2.08s/trial, best loss: 0.56044468630957]
100%|██████████| 7/7 [00:04<00:00,  4.40s/trial, best loss: 0.5431858819870947]
100%|██████████| 8/8 [00:02<00:00,  2.08s/trial, best loss: 0.5431858819870947]
100%|██████████| 9/9 [00:02<00:00,  2.73s/trial, best loss: 0.5431858819870947]
100%|██████████| 10/10 [00:31<00:00, 31.69s/trial, best loss: 0.5431858819870947]
100%|██████████| 11/11 [00:31<00:00, 31.80s/trial, best loss: 0.5431858819870947]
100%|██████████| 12/12 [00:02<00:00,  2.47s/trial, best loss: 0.5431858819870947]
100%|██████████| 13/13 [00:12<00:00, 12.01s/

  "X does not have valid feature names, but"


#### Results from hyperopt: 
    Accuracy: 0.470
{'learner': SGDClassifier(alpha=0.0002257465979246694, eta0=0.000573802785584797,
              l1_ratio=0.036808573518640926, learning_rate='invscaling',
              loss='modified_huber', max_iter=1209, n_jobs=1, penalty='l1',
              power_t=0.4423074559498752, random_state=3,
              tol=0.002940709069958797), 'preprocs': (StandardScaler(with_std=False),), 'ex_preprocs': ()}