In [1]:
import pandas as pd
import numpy as np

X1 = pd.read_csv('../data/features.csv', header=None).values
X2 = pd.read_csv('../data/raw_images.csv', header=None).values
y1 = pd.read_csv('../data/labels.csv', header=None).values.ravel().astype(int)

print('X1.shape:', X1.shape)
print('X2.shape:', X2.shape)
print('y1.shape:', y1.shape)

X1.shape: (4559, 512)
X2.shape: (4559, 30000)
y1.shape: (4559,)


In [2]:
from sklearn.model_selection import train_test_split

X1_train, X1_test, y1_train, y1_test = \
    train_test_split(X1, y1, test_size=0.3, random_state=123, shuffle=True, stratify=y1)

X1_train_sub, X1_valid, y1_train_sub, y1_valid = \
    train_test_split(X1_train, y1_train, test_size=0.2, random_state=123, stratify=y1_train)

print('Train/Valid/Test sizes:', y1_train.shape[0], y1_valid.shape[0], y1_test.shape[0])

Train/Valid/Test sizes: 3191 639 1368


In [3]:
X2_train, X2_test, y1_train, y1_test = \
    train_test_split(X2, y1, test_size=0.3, random_state=123, shuffle=True, stratify=y1)

X2_train_sub, X2_valid, y1_train_sub, y1_valid = \
    train_test_split(X2_train, y1_train, test_size=0.2, random_state=123, stratify=y1_train)

print('Train/Valid/Test sizes:', y1_train.shape[0], y1_valid.shape[0], y1_test.shape[0])

Train/Valid/Test sizes: 3191 639 1368


## XGBoost

### X1

In [13]:
import scipy.stats
from sklearn.model_selection import RandomizedSearchCV
from xgboost import XGBClassifier

xgb = XGBClassifier(random_state=123, use_label_encoder=False)

params = {
    'n_estimators':[30, 50, 100, 300, 500],
    'min_child_weight':[4,5], 
    "lambda": scipy.stats.uniform(1e-8, 1.0),
    "alpha": scipy.stats.uniform(1e-8, 1.0),
    'gamma':[i/10.0 for i in range(3,6)],  
    'subsample':[i/10.0 for i in range(6,11)],
    'colsample_bytree':[i/10.0 for i in range(6,11)], 
    'max_depth': [2,3,4,6,7],
    'objective': ['reg:squarederror', 'reg:tweedie'],
    'booster': ['gbtree', 'gblinear'],
    'eval_metric': ['rmse'],
    'eta': [i/10.0 for i in range(3,6)],
}

search = RandomizedSearchCV(
    estimator=xgb,
    param_distributions=params,
    n_iter=15,
    cv=5,
    verbose=2,
    n_jobs=1,
    random_state=123)

search.fit(X1_train, y1_train)

search.best_score_

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV] END alpha=0.6964691955978617, booster=gbtree, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.5513147790828913, max_depth=6, min_child_weight=4, n_estimators=300, objective=reg:tweedie, subsample=0.7; total time=  14.8s
[CV] END alpha=0.6964691955978617, booster=gbtree, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.5513147790828913, max_depth=6, min_child_weight=4, n_estimators=300, objective=reg:tweedie, subsample=0.7; total time=  14.9s
[CV] END alpha=0.6964691955978617, booster=gbtree, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.5513147790828913, max_depth=6, min_child_weight=4, n_estimators=300, objective=reg:tweedie, subsample=0.7; total time=  15.5s
[CV] END alpha=0.6964691955978617, booster=gbtree, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.5513147790828913, max_depth=6, min_child_weight=4, n_estimators=300, objective=re

[CV] END alpha=0.4920847867923423, booster=gblinear, colsample_bytree=1.0, eta=0.3, eval_metric=rmse, gamma=0.3, lambda=0.4172099580253376, max_depth=6, min_child_weight=4, n_estimators=50, objective=reg:squarederror, subsample=0.6; total time=   0.2s
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] END alpha=0.4920847867923423, booster=gblinear, colsample_bytree=1.0, eta=0.3, eval_metric=rmse, gamma=0.3, lambda=0.4172099580253376, max_depth=6, min_child_weight=4, n_estimators=50, objective=reg:squarederror, subsample=0.6; total time=   0.2s
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are onl

[CV] END alpha=0.48303427426270434, booster=gblinear, colsample_bytree=0.7, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.7515430037886347, max_depth=7, min_child_weight=4, n_estimators=50, objective=reg:tweedie, subsample=0.9; total time=   0.2s
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] END alpha=0.48303427426270434, booster=gblinear, colsample_bytree=0.7, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.7515430037886347, max_depth=7, min_child_weight=4, n_estimators=50, objective=reg:tweedie, subsample=0.9; total time=   0.2s
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used i

[CV] END alpha=0.21229811338998222, booster=gblinear, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.4, lambda=0.01612921669501683, max_depth=2, min_child_weight=5, n_estimators=30, objective=reg:tweedie, subsample=0.9; total time=   0.1s
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] END alpha=0.21229811338998222, booster=gblinear, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.4, lambda=0.01612921669501683, max_depth=2, min_child_weight=5, n_estimators=30, objective=reg:tweedie, subsample=0.9; total time=   0.1s
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used

[CV] END alpha=0.23130149131227545, booster=gblinear, colsample_bytree=1.0, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.00413465274939046, max_depth=7, min_child_weight=4, n_estimators=500, objective=reg:squarederror, subsample=0.6; total time=   1.5s
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV] END alpha=0.23130149131227545, booster=gblinear, colsample_bytree=1.0, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.00413465274939046, max_depth=7, min_child_weight=4, n_estimators=500, objective=reg:squarederror, subsample=0.6; total time=   1.5s
[CV] END alpha=0.15112746234808022, booster=gbtree, colsample_bytree=0.6, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.3434560240483249, max_depth=2,

0.9229060885690318

In [14]:
search.best_params_

{'alpha': 0.6848297485848633,
 'booster': 'gbtree',
 'colsample_bytree': 0.7,
 'eta': 0.3,
 'eval_metric': 'rmse',
 'gamma': 0.5,
 'lambda': 0.6273170193376167,
 'max_depth': 3,
 'min_child_weight': 5,
 'n_estimators': 500,
 'objective': 'reg:squarederror',
 'subsample': 0.6}

In [15]:
print(f"Training Accuracy: {search.best_estimator_.score(X1_train, y1_train)*100: 0.2f}%") 
print(f"Test Accuracy: {search.best_estimator_.score(X1_test, y1_test)*100: 0.2f}%")

Training Accuracy:  99.94%
Test Accuracy:  91.23%


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

cv_acc = cross_val_score(estimator=XGBClassifier(random_state=123, 
                                                 use_label_encoder=False,
                                                 alpha=0.6848297485848633,
                                                 booster='gbtree',
                                                 colsample_bytree=0.7,
                                                 eta=0.3,
                                                 eval_metric='rmse',
                                                 gamma=0.5,
                                                 'lambda'=0.6273170193376167,
                                                 max_depth= 3,
                                                 min_child_weight=5,
                                                 n_estimators=500,
                                                 objective='reg:squarederror',
                                                 subsample=0.6),
                         X=X1,
                         y=y1,
                         cv=StratifiedKFold(n_splits=10, random_state=123, shuffle=True),
                         n_jobs=-1)

print('Kfold Accuracy: %.2f%%' % (np.mean(cv_acc)*100))

### X2

In [9]:
search = RandomizedSearchCV(
    estimator=xgb,
    param_distributions=params,
    n_iter=15,
    cv=5,
    verbose=10,
    n_jobs=1,
    random_state=123)

search.fit(X2_train, y1_train)

search.best_score_

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV 1/5; 1/15] START alpha=0.6964691955978617, booster=gbtree, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.5513147790828913, max_depth=6, min_child_weight=4, n_estimators=300, objective=reg:tweedie, subsample=0.7
[CV 1/5; 1/15] END alpha=0.6964691955978617, booster=gbtree, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.5513147790828913, max_depth=6, min_child_weight=4, n_estimators=300, objective=reg:tweedie, subsample=0.7;, score=0.933 total time=11.3min
[CV 2/5; 1/15] START alpha=0.6964691955978617, booster=gbtree, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.5513147790828913, max_depth=6, min_child_weight=4, n_estimators=300, objective=reg:tweedie, subsample=0.7
[CV 2/5; 1/15] END alpha=0.6964691955978617, booster=gbtree, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.5513147790828913, max_depth=6, min_child_weight=4, n_estimators=

Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV 4/5; 3/15] END alpha=0.7379954157320358, booster=gblinear, colsample_bytree=0.9, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.5315513838418384, max_depth=7, min_child_weight=5, n_estimators=30, objective=reg:tweedie, subsample=0.6;, score=0.928 total time=   8.1s
[CV 5/5; 3/15] START alpha=0.7379954157320358, booster=gblinear, colsample_bytree=0.9, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.5315513838418384, max_depth=7, min_child_weight=5, n_estimators=30, objective=reg:tweedie, subsample=0.6
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters ar

[CV 2/5; 5/15] END alpha=0.7659959601929949, booster=gblinear, colsample_bytree=0.6, eta=0.5, eval_metric=rmse, gamma=0.5, lambda=0.4936851076503062, max_depth=4, min_child_weight=4, n_estimators=300, objective=reg:tweedie, subsample=1.0;, score=0.912 total time= 1.8min
[CV 3/5; 5/15] START alpha=0.7659959601929949, booster=gblinear, colsample_bytree=0.6, eta=0.5, eval_metric=rmse, gamma=0.5, lambda=0.4936851076503062, max_depth=4, min_child_weight=4, n_estimators=300, objective=reg:tweedie, subsample=1.0
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV 3/5; 5/15] END alpha=0.7659959601929949, booster=gblinear, colsample_bytree=0.6, eta=0.5, eval_metric=rmse, gamma=0.5, lambda=0.4936851076503062, 

[CV 3/5; 7/15] END alpha=0.48303427426270434, booster=gblinear, colsample_bytree=0.7, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.7515430037886347, max_depth=7, min_child_weight=4, n_estimators=50, objective=reg:tweedie, subsample=0.9;, score=0.945 total time=  12.7s
[CV 4/5; 7/15] START alpha=0.48303427426270434, booster=gblinear, colsample_bytree=0.7, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.7515430037886347, max_depth=7, min_child_weight=4, n_estimators=50, objective=reg:tweedie, subsample=0.9
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV 4/5; 7/15] END alpha=0.48303427426270434, booster=gblinear, colsample_bytree=0.7, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.7515430037886347,

Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV 2/5; 9/15] END alpha=0.26828131771172453, booster=gblinear, colsample_bytree=0.9, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.8423424476202573, max_depth=6, min_child_weight=5, n_estimators=500, objective=reg:squarederror, subsample=0.9;, score=0.923 total time= 5.1min
[CV 3/5; 9/15] START alpha=0.26828131771172453, booster=gblinear, colsample_bytree=0.9, eta=0.3, eval_metric=rmse, gamma=0.5, lambda=0.8423424476202573, max_depth=6, min_child_weight=5, n_estimators=500, objective=reg:squarederror, subsample=0.9
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some

[CV 5/5; 10/15] END alpha=0.21229811338998222, booster=gblinear, colsample_bytree=0.8, eta=0.3, eval_metric=rmse, gamma=0.4, lambda=0.01612921669501683, max_depth=2, min_child_weight=5, n_estimators=30, objective=reg:tweedie, subsample=0.9;, score=0.947 total time=   9.7s
[CV 1/5; 11/15] START alpha=0.695529538770911, booster=gblinear, colsample_bytree=0.7, eta=0.3, eval_metric=rmse, gamma=0.4, lambda=0.2183760235542644, max_depth=7, min_child_weight=4, n_estimators=500, objective=reg:tweedie, subsample=1.0
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV 1/5; 11/15] END alpha=0.695529538770911, booster=gblinear, colsample_bytree=0.7, eta=0.3, eval_metric=rmse, gamma=0.4, lambda=0.2183760235542644

Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV 4/5; 12/15] END alpha=0.23130149131227545, booster=gblinear, colsample_bytree=1.0, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.00413465274939046, max_depth=7, min_child_weight=4, n_estimators=500, objective=reg:squarederror, subsample=0.6;, score=0.931 total time= 5.7min
[CV 5/5; 12/15] START alpha=0.23130149131227545, booster=gblinear, colsample_bytree=1.0, eta=0.5, eval_metric=rmse, gamma=0.3, lambda=0.00413465274939046, max_depth=7, min_child_weight=4, n_estimators=500, objective=reg:squarederror, subsample=0.6
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to 

[CV 2/5; 15/15] END alpha=0.17108183920509906, booster=gblinear, colsample_bytree=0.7, eta=0.4, eval_metric=rmse, gamma=0.3, lambda=0.578551478108833, max_depth=3, min_child_weight=4, n_estimators=100, objective=reg:squarederror, subsample=0.7;, score=0.928 total time=  28.7s
[CV 3/5; 15/15] START alpha=0.17108183920509906, booster=gblinear, colsample_bytree=0.7, eta=0.4, eval_metric=rmse, gamma=0.3, lambda=0.578551478108833, max_depth=3, min_child_weight=4, n_estimators=100, objective=reg:squarederror, subsample=0.7
Parameters: { colsample_bytree, gamma, max_depth, min_child_weight, subsample } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[CV 3/5; 15/15] END alpha=0.17108183920509906, booster=gblinear, colsample_bytree=0.7, eta=0.4, eval_metric=rmse, gamma=0.3, lambda=0.5785

0.9442187783615662

In [10]:
search.best_params_

{'alpha': 0.6848297485848633,
 'booster': 'gbtree',
 'colsample_bytree': 0.7,
 'eta': 0.3,
 'eval_metric': 'rmse',
 'gamma': 0.5,
 'lambda': 0.6273170193376167,
 'max_depth': 3,
 'min_child_weight': 5,
 'n_estimators': 500,
 'objective': 'reg:squarederror',
 'subsample': 0.6}

In [12]:
print(f"Training Accuracy: {search.best_estimator_.score(X2_train, y1_train)*100: 0.2f}%") 
print(f"Test Accuracy: {search.best_estimator_.score(X2_test, y1_test)*100: 0.2f}%")

Training Accuracy:  100.00%
Test Accuracy:  95.18%


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

cv_acc = cross_val_score(estimator=XGBClassifier(random_state=123, 
                                                 use_label_encoder=False,
                                                 alpha=0.6848297485848633,
                                                 booster='gbtree',
                                                 colsample_bytree=0.7,
                                                 eta=0.3,
                                                 eval_metric='rmse',
                                                 gamma=0.5,
                                                 'lambda'=0.6273170193376167,
                                                 max_depth= 3,
                                                 min_child_weight=5,
                                                 n_estimators=500,
                                                 objective='reg:squarederror',
                                                 subsample=0.6),
                         X=X2,
                         y=y1,
                         cv=StratifiedKFold(n_splits=10, random_state=123, shuffle=True),
                         n_jobs=-1)

print('Kfold Accuracy: %.2f%%' % (np.mean(cv_acc)*100))

## Logistic Regression

### X1

In [8]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(random_state=123)

params = {
    "C": scipy.stats.expon(scale=.01),
    "fit_intercept": [True, False],
    "warm_start": [True,False]
}

search = RandomizedSearchCV(
    estimator=lr,
    param_distributions=params,
    n_iter=15,
    cv=10,
    verbose=2,
    n_jobs=1,
    random_state=123)

search.fit(X1_train, y1_train)

search.best_score_

Fitting 10 folds for each of 15 candidates, totalling 150 fits
[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=   0.1s
[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=   0.1s
[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=   0.1s
[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=   0.0s
[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=   0.2s
[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=   0.0s
[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=   0.0s
[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=   0.0s
[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=   0.0s
[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=   0.0s
[CV] END C=0.002572840801170508, fit_intercept=

[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C=0.010062180612636784, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C=0.010062180612636784, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C=0.010062180612636784, fit_intercept=True, warm_start=False; total time=   0.0s
[CV] END C

0.759635579937304

In [9]:
search.best_params_

{'C': 0.03950982068814718, 'fit_intercept': True, 'warm_start': False}

In [10]:
print(f"Training Accuracy: {search.best_estimator_.score(X1_train, y1_train)*100: 0.2f}%") 
print(f"Test Accuracy: {search.best_estimator_.score(X1_test, y1_test)*100: 0.2f}%")

Training Accuracy:  76.68%
Test Accuracy:  78.22%


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

cv_acc = cross_val_score(estimator=LogisticRegression(random_state=123, 
                                                      C=0.03950982068814718,
                                                      fit_intercept=True,
                                                      warm_start=False),
                         X=X1,
                         y=y1,
                         cv=StratifiedKFold(n_splits=10, random_state=123, shuffle=True),
                         n_jobs=-1)

print('Kfold Accuracy: %.2f%%' % (np.mean(cv_acc)*100))

### X2

In [13]:
search = RandomizedSearchCV(
    estimator=lr,
    param_distributions=params,
    n_iter=15,
    cv=5,
    verbose=2,
    n_jobs=1,
    random_state=123)

search.fit(X2_train, y1_train)

search.best_score_

Fitting 5 folds for each of 15 candidates, totalling 75 fits


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=  10.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=  10.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=  10.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.011922721434811058, fit_intercept=True, warm_start=True; total time=  10.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.002572840801170508, fit_intercept=True, warm_start=False; total time=  10.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.002572840801170508, fit_intercept=True, warm_start=False; total time=  10.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.002572840801170508, fit_intercept=True, warm_start=False; total time=  10.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.002572840801170508, fit_intercept=True, warm_start=False; total time=  10.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.002572840801170508, fit_intercept=True, warm_start=False; total time=  10.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012710709354874424, fit_intercept=False, warm_start=False; total time=  10.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012710709354874424, fit_intercept=False, warm_start=False; total time=  10.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012710709354874424, fit_intercept=False, warm_start=False; total time=  10.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012710709354874424, fit_intercept=False, warm_start=False; total time=  10.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012710709354874424, fit_intercept=False, warm_start=False; total time=  11.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.03950982068814718, fit_intercept=True, warm_start=False; total time=  10.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.03950982068814718, fit_intercept=True, warm_start=False; total time=  10.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.03950982068814718, fit_intercept=True, warm_start=False; total time=  10.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.03950982068814718, fit_intercept=True, warm_start=False; total time=  10.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.03950982068814718, fit_intercept=True, warm_start=False; total time=  11.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.006557201934108684, fit_intercept=False, warm_start=True; total time=  10.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.006557201934108684, fit_intercept=False, warm_start=True; total time=  10.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.006557201934108684, fit_intercept=False, warm_start=True; total time=  10.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.006557201934108684, fit_intercept=False, warm_start=True; total time=  10.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.006557201934108684, fit_intercept=False, warm_start=True; total time=  10.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.004203422502573983, fit_intercept=False, warm_start=False; total time=  10.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.004203422502573983, fit_intercept=False, warm_start=False; total time=  10.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.004203422502573983, fit_intercept=False, warm_start=False; total time=  10.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.004203422502573983, fit_intercept=False, warm_start=False; total time=  11.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.004203422502573983, fit_intercept=False, warm_start=False; total time=  11.1s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.005772721768030297, fit_intercept=False, warm_start=True; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.005772721768030297, fit_intercept=False, warm_start=True; total time=  11.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.005772721768030297, fit_intercept=False, warm_start=True; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.005772721768030297, fit_intercept=False, warm_start=True; total time=  11.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.005772721768030297, fit_intercept=False, warm_start=True; total time=  11.1s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.005075713502135167, fit_intercept=True, warm_start=False; total time=  11.1s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.005075713502135167, fit_intercept=True, warm_start=False; total time=  11.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.005075713502135167, fit_intercept=True, warm_start=False; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.005075713502135167, fit_intercept=True, warm_start=False; total time=  11.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.005075713502135167, fit_intercept=True, warm_start=False; total time=  11.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.0020149426060868045, fit_intercept=True, warm_start=True; total time=  11.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.0020149426060868045, fit_intercept=True, warm_start=True; total time=  11.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.0020149426060868045, fit_intercept=True, warm_start=True; total time=  11.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.0020149426060868045, fit_intercept=True, warm_start=True; total time=  11.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.0020149426060868045, fit_intercept=True, warm_start=True; total time=  11.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=  11.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=  11.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=  11.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.007583288393287821, fit_intercept=True, warm_start=False; total time=  11.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.010062180612636784, fit_intercept=True, warm_start=False; total time=  11.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.010062180612636784, fit_intercept=True, warm_start=False; total time=  11.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.010062180612636784, fit_intercept=True, warm_start=False; total time=  11.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.010062180612636784, fit_intercept=True, warm_start=False; total time=  11.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.010062180612636784, fit_intercept=True, warm_start=False; total time=  11.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012890055033202343, fit_intercept=True, warm_start=True; total time=  11.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012890055033202343, fit_intercept=True, warm_start=True; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012890055033202343, fit_intercept=True, warm_start=True; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012890055033202343, fit_intercept=True, warm_start=True; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012890055033202343, fit_intercept=True, warm_start=True; total time=  11.1s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012817303398323033, fit_intercept=False, warm_start=False; total time=  11.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012817303398323033, fit_intercept=False, warm_start=False; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012817303398323033, fit_intercept=False, warm_start=False; total time=  10.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012817303398323033, fit_intercept=False, warm_start=False; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.012817303398323033, fit_intercept=False, warm_start=False; total time=  11.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.004490857897120136, fit_intercept=False, warm_start=True; total time=  11.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.004490857897120136, fit_intercept=False, warm_start=True; total time=  11.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.004490857897120136, fit_intercept=False, warm_start=True; total time=  11.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.004490857897120136, fit_intercept=False, warm_start=True; total time=  11.9s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.004490857897120136, fit_intercept=False, warm_start=True; total time=  11.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.0034773509004921363, fit_intercept=True, warm_start=False; total time=  11.7s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.0034773509004921363, fit_intercept=True, warm_start=False; total time=  11.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.0034773509004921363, fit_intercept=True, warm_start=False; total time=  11.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.0034773509004921363, fit_intercept=True, warm_start=False; total time=  11.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=0.0034773509004921363, fit_intercept=True, warm_start=False; total time=  11.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.922281091635147

In [14]:
search.best_params_

{'C': 0.002572840801170508, 'fit_intercept': True, 'warm_start': False}

In [15]:
print(f"Training Accuracy: {search.best_estimator_.score(X2_train, y1_train)*100: 0.2f}%") 
print(f"Test Accuracy: {search.best_estimator_.score(X2_test, y1_test)*100: 0.2f}%")

Training Accuracy:  100.00%
Test Accuracy:  92.76%


In [1]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

cv_acc = cross_val_score(estimator=LogisticRegression(random_state=123, 
                                                      C=0.002572840801170508,
                                                      fit_intercept=True,
                                                      warm_start=False),
                         X=X2,
                         y=y1,
                         cv=StratifiedKFold(n_splits=10, random_state=123, shuffle=True),
                         n_jobs=-1)

print('Kfold Accuracy: %.2f%%' % (np.mean(cv_acc)*100))

NameError: name 'LogisticRegression' is not defined

## Decision Tree + Bagging

### X1

In [23]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV

tree = DecisionTreeClassifier(random_state=123)

params =  {
    'min_samples_split': [2, 3, 4],
    'max_depth': [5, 7, 9, 11, None],
    'criterion':['entropy', 'gini']
}

search = RandomizedSearchCV(
    estimator=tree,
    param_distributions=params,
    n_iter=15,
    cv=5,
    verbose=2,
    n_jobs=1,
    random_state=123)

search.fit(X1_train_sub, y1_train_sub)

search.best_score_

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV] END criterion=entropy, max_depth=9, min_samples_split=3; total time=   0.3s
[CV] END criterion=entropy, max_depth=9, min_samples_split=3; total time=   0.3s
[CV] END criterion=entropy, max_depth=9, min_samples_split=3; total time=   0.3s
[CV] END criterion=entropy, max_depth=9, min_samples_split=3; total time=   0.3s
[CV] END criterion=entropy, max_depth=9, min_samples_split=3; total time=   0.3s
[CV] END criterion=gini, max_depth=None, min_samples_split=4; total time=   0.4s
[CV] END criterion=gini, max_depth=None, min_samples_split=4; total time=   0.3s
[CV] END criterion=gini, max_depth=None, min_samples_split=4; total time=   0.4s
[CV] END criterion=gini, max_depth=None, min_samples_split=4; total time=   0.3s
[CV] END criterion=gini, max_depth=None, min_samples_split=4; total time=   0.3s
[CV] END criterion=entropy, max_depth=7, min_samples_split=4; total time=   0.2s
[CV] END criterion=entropy, max_depth=7, min_sam

0.8691232109282069

In [24]:
search.best_params_

{'min_samples_split': 3, 'max_depth': 5, 'criterion': 'gini'}

In [26]:
print(f"Training Accuracy: {search.best_estimator_.score(X1_train_sub, y1_train_sub)*100: 0.2f}%") 
print(f"Valid Accuracy: {search.best_estimator_.score(X1_valid, y1_valid)*100: 0.2f}%")

Training Accuracy:  91.77%
Valid Accuracy:  87.32%


In [28]:
from sklearn.ensemble import BaggingClassifier

bag = BaggingClassifier(base_estimator=search.best_estimator_, 
                        n_estimators=150, 
                        oob_score=True, 
                        bootstrap=True,
                        bootstrap_features=False, 
                        n_jobs=1, 
                        random_state=123)

bag.fit(X1_train_sub, y1_train_sub)
print(f"Train Accuracy: {bag.score(X1_train_sub, y1_train_sub)*100:0.3f}%")
print(f"Valid Accuracy: {bag.score(X1_valid, y1_valid)*100:0.3f}%")

Train Accuracy: 93.064%
Valid Accuracy: 88.576%


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

cv_acc = cross_val_score(estimator=bag,
                         X=X1,
                         y=y1,
                         cv=StratifiedKFold(n_splits=10, random_state=123, shuffle=True),
                         n_jobs=-1)

print('Kfold Accuracy: %.2f%%' % (np.mean(cv_acc)*100))

### X2

In [34]:
search.best_estimator_ = DecisionTreeClassifier(max_depth=18, 
                                                min_impurity_decrease=0.017492913234125385,
                                                min_samples_split=2)

In [35]:
from sklearn.ensemble import BaggingClassifier

bag = BaggingClassifier(base_estimator=search.best_estimator_, 
                        n_estimators=50, 
                        oob_score=True, 
                        bootstrap=True,
                        bootstrap_features=False, 
                        n_jobs=1, 
                        random_state=123)

bag.fit(X2_train_sub, y1_train_sub)
print(f"Train Accuracy: {bag.score(X2_train_sub, y1_train_sub)*100:0.3f}%")
print(f"Valid Accuracy: {bag.score(X2_valid, y1_valid)*100:0.3f}%")

Train Accuracy: 91.066%
Valid Accuracy: 89.984%


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

cv_acc = cross_val_score(estimator=bag,
                         X=X2,
                         y=y1,
                         cv=StratifiedKFold(n_splits=10, random_state=123, shuffle=True),
                         n_jobs=-1)

print('Kfold Accuracy: %.2f%%' % (np.mean(cv_acc)*100))