# Lojistik Regresyon üzerinde Optimizasyon

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
ROOT_DIR = "/content/drive/MyDrive/CASGEM-Egitim/Egitim-Part1/Day8-Optimization/notebooks"
DATASET_PATH = ROOT_DIR + "/datasets/"

# Kütüphaneleri Yükleme

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings('ignore')

# Veri Kümesi Yükleme

In [None]:
data = pd.read_csv(DATASET_PATH + 'breast_cancer_wisconsin_diagnostic/data.csv')
data

In [None]:
#Get Target data 
y = data['diagnosis']

#Load X Variables into a Pandas Dataframe with columns 
X = data.drop(['id','diagnosis','Unnamed: 32'], axis = 1)

# Özniteliklerde Eksik Değer Kontrolü

In [None]:
X.isnull().sum()
#We do not have any missing values

In [None]:
X.head()

In [None]:
#Check size of data
X.shape

# Lojistik Regresyon Hiperparametrelerini Belirleme

In [None]:
logModel = LogisticRegression()

In [None]:
param_grid = [    
    {'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
    'C' : np.logspace(-4, 4, 20),
    'solver' : ['lbfgs','newton-cg','liblinear','sag','saga'],
    'max_iter' : [100, 1000,2500, 5000]
    }
]

### Hiperparametreler ile ilgili daha fazla bilgi için;

* Solver: https://towardsdatascience.com/dont-sweat-the-solver-stuff-aea7cddc3451
* L1 and L2 Regularisation: https://towardsdatascience.com/l1-and-l2-regularization-methods-ce25e7fc831c
* Slearn Logistic Regression: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

## Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
clf = GridSearchCV(logModel, param_grid = param_grid, cv = 3, verbose=True, n_jobs=-1)

In [None]:
best_clf = clf.fit(X,y)

In [None]:
best_clf.best_estimator_

# Başarımı Kontrol Et

In [None]:
print (f'Accuracy - : {best_clf.score(X,y):.3f}')