# Parametreler Nasıl Optimize edilir?

Temel 3 problem var. 
* Hangi Algoritma?
* Verinin hangi kısmı
* Algoritmanın hangi parametreleri ne olmalı?

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [2]:
dataset = pd.read_csv("Social_Network_Ads.csv")

In [3]:
X= dataset.iloc[:,[2,3]].values
Y = dataset.iloc[:,4].values

In [4]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.33,random_state=0)

In [5]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [6]:
classifier = SVC(kernel="rbf",random_state=0)
classifier.fit(X_train,Y_train)

In [7]:
y_pred = classifier.predict(X_test)

In [8]:
cm = confusion_matrix(Y_test,y_pred)
cm

array([[76,  8],
       [ 4, 44]], dtype=int64)

# K Katmanlı Seçim
* 1.estimator
* X
* Y
* cv kaç katmanlı

In [9]:
from sklearn.model_selection import cross_val_score

In [10]:
basari = cross_val_score(estimator = classifier,
                         X=X_train,
                         y=Y_train,
                         cv=4)

In [11]:
basari.mean()

0.9067164179104478

In [12]:
basari.std()

0.030542361089076302

# Parametre Optimizasyonu ve Algoritma Seçimi

In [14]:
from sklearn.model_selection import GridSearchCV

In [25]:
parametreler = {"C":[1,2,3,4],
     "kernel":["rbf","linear"],
     "gamma":[1,0.9,0.8,0.7,0.6,0.5]}


* estimator Sınıflandırma algoritması
* param_grid : Verilecek parametreler
* scoring : neye göre skorlanacak
* cv  : Kaç katlamalı olacağı
* n_jobs : aynı anda çalışacak iş


In [26]:
gs = GridSearchCV(estimator=classifier,
                  param_grid=parametreler,
                  scoring="accuracy",
                  cv=10,
                  n_jobs=-1)

In [27]:
grid_seacrh = gs.fit(X_train,Y_train)

In [31]:
en_iyi_parametreler = grid_seacrh.best_params_
en_iyi_parametreler

{'C': 3, 'gamma': 0.7, 'kernel': 'rbf'}

In [29]:
grid_seacrh.best_score_

0.9257834757834758

In [33]:
svc_tuned = grid_seacrh.best_estimator_

In [34]:
y_pred = svc_tuned.predict(X_test)

In [36]:
cm = confusion_matrix(Y_test,y_pred)
cm

array([[78,  6],
       [ 6, 42]], dtype=int64)

In [38]:
svc_tuned.get_params()

{'C': 3,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 0.7,
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': 0,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}