In [548]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_iris

### Data, x, y

In [549]:
iris = load_iris()

In [550]:
x = iris.data
y = iris.target

### Normalization

In [551]:
x = (x - np.min(x))/(np.max(x) - np.min(x))

### Train Test Split

In [552]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)

### KNN Model

In [553]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 3)

### K Fold CV K = 10

In [554]:
from sklearn.model_selection import cross_val_score

accuracies =  cross_val_score(estimator = knn, X = x_train, y = y_train, cv = 10)


In [555]:
accuracies

array([0.90909091, 0.90909091, 1.        , 1.        , 1.        ,
       0.9       , 0.9       , 1.        , 1.        , 0.9       ])

In [556]:
print("average accuracy: ", np.mean(accuracies))
print("average std: ", np.std(accuracies))

average accuracy:  0.9518181818181818
average std:  0.04828462445414364


### KNN test accuracy

In [557]:
knn.fit(x_train, y_train)
print("knn test score: ", knn.score(x_test, y_test))

knn test score:  0.9777777777777777


### Grid Search Cross Validation 

- hangi k değerine göre sonuç en iyi olduğunu bulur

In [558]:
from sklearn.model_selection import GridSearchCV

grid = {"n_neighbors": np.arange(1, 50)}
knn_grid = KNeighborsClassifier()

knn_cv = GridSearchCV(knn_grid, grid, cv = 10)
knn_cv.fit(x, y)

print("tuned hyperparameter K: ", knn_cv.best_params_)
print("tuned prarametreye göre en iyi accuray: ", knn_cv.best_score_)

tuned hyperparameter K:  {'n_neighbors': 13}
tuned prarametreye göre en iyi accuray:  0.9800000000000001


In [559]:
knn2 = KNeighborsClassifier(n_neighbors = 13)
knn2.fit(x_train, y_train)

print("score: ", knn2.score(x_test, y_test))

score:  1.0


#### Logistic Regression göre Best değeri bulma

In [560]:
x_log = x[:100,:]
y_log = y[:100]

In [561]:
# %% normalization
x_log = (x_log-np.min(x_log))/(np.max(x_log)-np.min(x_log))

x_train_log, x_test_log, y_train_log, y_test_log = train_test_split(x_log, y_log, test_size = 0.3)

In [562]:
# %% Grid search CV with logistic regression

from sklearn.linear_model import LogisticRegression

log_grid = {"C":np.logspace(-3, 3, 7),"penalty":["l1","l2"]}  # l1 = lasso ve l2 = ridge

logreg = LogisticRegression()
logreg_cv = GridSearchCV(logreg, log_grid, cv = 10)
logreg_cv.fit(x_train_log, y_train_log)

Traceback (most recent call last):
  File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 1306, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\linear_model\_logistic.py", line 443, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\LENOVO\AppDat

GridSearchCV(cv=10, estimator=LogisticRegression(),
             param_grid={'C': array([1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02, 1.e+03]),
                         'penalty': ['l1', 'l2']})

In [563]:
print("tuned hyperparameters: (best parameters): ",logreg_cv.best_params_)
print("accuracy: ",logreg_cv.best_score_)

tuned hyperparameters: (best parameters):  {'C': 0.1, 'penalty': 'l2'}
accuracy:  1.0


In [564]:
logreg2 = LogisticRegression(C=1.0, penalty="l2")
logreg2.fit(x_train_log, y_train_log)
print("score: ", logreg2.score(x_test_log, y_test_log))

score:  1.0
