# Hyper-parameter tuning

In [4]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

In [6]:
iris = load_iris()
df = pd.DataFrame(iris['data'], columns=['SL','SW','PL','PW'])
df['target'] = iris['target']
df.head()

Unnamed: 0,SL,SW,PL,PW,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [12]:
from sklearn.model_selection import train_test_split
xtr, xts, ytr, yts = train_test_split(
    df[['SL', 'SW', 'PL', 'PW']], df['target']
)
xtr.head()

Unnamed: 0,SL,SW,PL,PW
45,4.8,3.0,1.4,0.3
46,5.1,3.8,1.6,0.2
18,5.7,3.8,1.7,0.3
117,7.7,3.8,6.7,2.2
49,5.0,3.3,1.4,0.2


In [25]:
from sklearn.linear_model import LogisticRegression
modelAsli = LogisticRegression()
modelAsli.fit(xtr, ytr)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [26]:
modelAsli.score(xts, yts)

0.9473684210526315

In [27]:
from sklearn.model_selection import cross_val_score
print(cross_val_score(modelAsli, xtr, ytr))
print(np.mean(cross_val_score(modelAsli, xtr, ytr)))

[1.         0.86956522 0.95454545 0.95454545 1.        ]
0.9557312252964426


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

In [28]:
# param yang akan dituned + nilai yg mungkin
penalty = ['l1', 'l2', 'elasticnet', 'none']
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
max_iter = [1, 10, 100, 1000, 10000]

param = {
    'penalty': penalty, 'solver': solver, 'max_iter': max_iter
}
param

{'penalty': ['l1', 'l2', 'elasticnet', 'none'],
 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
 'max_iter': [1, 10, 100, 1000, 10000]}

In [29]:
# model.get_params()

Hyperparameter Tuning:
- Randomized Search Cross Validation
- Grid Search Cross Validation = 4 * 5 * 5 = 100

# Randomized Search CV

In [30]:
from sklearn.model_selection import RandomizedSearchCV
model = LogisticRegression()
modelrs = RandomizedSearchCV(
    estimator = model, param_distributions = param, cv = 5
)

In [31]:
modelrs.fit(xtr, ytr)
modelrs.best_params_

ValueError: l1_ratio must be between 0 and 1; got (l1_ratio=None)

ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.



{'solver': 'sag', 'penalty': 'none', 'max_iter': 10000}

In [32]:
modelAsli.score(xts, yts)

0.9473684210526315

In [35]:
modelBaru = LogisticRegression(
    solver='sag', penalty = 'none', max_iter = 10000
)
modelBaru.fit(xtr, ytr)
modelBaru.score(xts, yts)

0.9736842105263158

# Grid Search CV

In [38]:
from sklearn.model_selection import GridSearchCV
model = LogisticRegression()
modelgs = GridSearchCV(
    model, param, cv = 5
)

In [42]:
modelgs.fit(xtr, ytr)
modelgs.best_params_

ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterati

{'max_iter': 100, 'penalty': 'l2', 'solver': 'sag'}

In [44]:
modelAsli.score(xts, yts)

0.9473684210526315

In [45]:
modelBaru = LogisticRegression(
    max_iter = 100, penalty = 'l2', solver = 'sag'
)
modelBaru.fit(xtr, ytr)
modelBaru.score(xts, yts)



0.9736842105263158