# CLASSIFICATION
-------------

## 01 - Data Preprocessing

In [1]:
# Import Data

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Load Data

veriler = pd.read_csv('veriler.csv')
veriler.dropna(inplace = True)

x = veriler.iloc[:, 1:4].values      # Independent Variable
y = veriler.iloc[:, 4].values       # Dependent Variables

# Split Data

from sklearn.model_selection import train_test_split

x_train, x_test,y_train,y_test = train_test_split(x,y,test_size=0.33, random_state=0)

# Scalling Data

from sklearn.preprocessing import StandardScaler

sc=StandardScaler()

X_train = sc.fit_transform(x_train)
X_test = sc.transform(x_test)

## 02 - Logistic Regressing

In [2]:
from sklearn.linear_model import LogisticRegression

# solver : ("newton_cg", "lbfgs", "liblinear", "sag", "saga") default = "lbfgs"
# random_state default: None

logr = LogisticRegression(random_state = 1, solver="saga")

logr.fit(X_train,y_train)  
y_pred = logr.predict(X_test)     

# Konfusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test,y_pred)
print("Logistic Regression")
print(cm)
print("*********************************************")

# K-Fold Cross Validation

from sklearn.model_selection import cross_val_score

basari = cross_val_score(estimator = logr, X = X_train, y = y_train , cv = 4)

print("Average success of model")
print(basari.mean())    
print("**********************************************") 

print("Average standard deviation of model")
print(basari.std())  
print("***********************************************")       

# Optimize Parameters on model

from sklearn.model_selection import GridSearchCV

p = [{"solver" : ["newton_cg", "lbfgs", "liblinear", "sag", "saga"]}]

gs = GridSearchCV(estimator=logr, param_grid=p, scoring="accuracy", cv=10, n_jobs=-1)
grid_search = gs.fit(x_train, y_train)

eniyisonuc = grid_search.best_score_
eniyiparametre = grid_search.best_params_

print("The best result")
print(eniyisonuc)
print("**********************************************")

print("The best parameter")
print(eniyiparametre)

Logistic Regression
[[0 1]
 [7 0]]
*********************************************
Average success of model
0.5625
**********************************************
Average standard deviation of model
0.16002386974726268
***********************************************




The best result
0.75
**********************************************
The best parameter
{'solver': 'saga'}




## 03 - K-NN
-----

In [3]:
from sklearn.neighbors import KNeighborsClassifier

# n_neighbors default = 5
# weights = ("uniform", "distance") default = "uniform"
# metric default = "minkowski"

knn = KNeighborsClassifier(n_neighbors = 5, metric='minkowski', weights="uniform")

knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)

# Konfusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test,y_pred)
print("K-NN")
print(cm)
print("******************************************")

# K-Fold Cross Validaiton 

from sklearn.model_selection import cross_val_score

basari = cross_val_score(estimator = knn, X = X_train, y = y_train , cv = 4)

print("Avearge success of model")
print(basari.mean())  
print("******************************************")

print("Average standard deviation of model")
print(basari.std())
print("******************************************") 

# Optimize parameters on model

from sklearn.model_selection import GridSearchCV

p = [{"n_neighbors": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "weights": ["uniform", "distance"], "metric": ["minkowski"]}]

gs = GridSearchCV(estimator=knn, param_grid=p, scoring="accuracy", cv=4, n_jobs=-1)
grid_search = gs.fit(x_train, y_train)

eniyisonuc = grid_search.best_score_
eniyiparametre = grid_search.best_params_

print("The best result")
print(eniyisonuc)
print("*****************************************")

print("The best parameter")
print(eniyiparametre)

K-NN
[[0 1]
 [6 1]]
******************************************
Avearge success of model
0.7083333333333334
******************************************
Average standard deviation of model
0.2975595178559521
******************************************
The best result
0.7083333333333334
*****************************************
The best parameter
{'metric': 'minkowski', 'n_neighbors': 5, 'weights': 'uniform'}


## 04 - SVM
--------------

In [8]:
from sklearn.svm import SVC

"""
kernel: ("rbf", "poly", "linear", "sigmoid", "precomputed") default: "rbf"
degree default: "2"
gamma: ("auto", "scale") default: ("scale")
"""
               
svc = SVC(kernel = 'poly', degree = 3, gamma="auto")  

svc.fit(X_train,y_train)
y_pred = svc.predict(X_test)

# Confusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test,y_pred)
print('SVC')
print(cm)
print("****************************************************")

# K-Fold Cross Validation

from sklearn.model_selection import cross_val_score

basari = cross_val_score(estimator = svc, X = X_train, y = y_train , cv = 4)

print("Average success of model")
print(basari.mean())        
print("****************************************************")

print("Average standard deviation of model")
print(basari.std())     
print("****************************************************")

# Optimize parameters on model

from sklearn.model_selection import GridSearchCV

p = [{"kernel": ["rbf", "poly", "linear", "sigmoid"], "degree": [2, 3, 4, 5, 6, 7, 8, 9], "gamma": ["auto", "scale"]}]

gs = GridSearchCV(estimator=svc, param_grid=p, scoring="accuracy", cv=4, n_jobs=-1)
grid_search = gs.fit(x_train, y_train)

eniyisonuc = grid_search.best_score_
eniyiparametre = grid_search.best_params_

print("The best result")
print(eniyisonuc)
print("****************************************************")

print("The best parameter")
print(eniyiparametre)

SVC
[[1 0]
 [7 0]]
****************************************************
Average success of model
0.5625
****************************************************
Average standard deviation of model
0.16002386974726268
****************************************************
The best result
0.7708333333333334
****************************************************
The best parameter
{'degree': 3, 'gamma': 'auto', 'kernel': 'poly'}


## 05 - NAVIE BAYES
------------

In [6]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()

gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)

# Confusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test,y_pred)
print('GNB')
print(cm)
print("****************************************************")

# K-Fold Cross Validation

from sklearn.model_selection import cross_val_score

basari = cross_val_score(estimator = gnb, X = X_train, y = y_train , cv = 4)

print("Average success of model")
print(basari.mean()) 
print("****************************************************")
       
print("Average standard deviation of mode")
print(basari.std())   

GNB
[[0 1]
 [6 1]]
****************************************************
Average success of model
0.7083333333333334
****************************************************
Average standard deviation of mode
0.2975595178559521


## 06 - DECISION TREE
------------

In [9]:
from sklearn.tree import DecisionTreeClassifier

# criterion = ("entropy", "gini") default = "gini"
# splitter = ("best", "random") default = "best"

dtc = DecisionTreeClassifier(random_state = 1, criterion = 'entropy', splitter = "best")

dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)

# Confusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test,y_pred)
print('Decision Tree')
print(cm)
print("****************************************************")

# K-Fold Cross Validation

from sklearn.model_selection import cross_val_score

basari = cross_val_score(estimator = dtc, X = X_train, y = y_train , cv = 4)

print("Average success of model")
print(basari.mean())        
print("****************************************************")

print("Average standard deviation of model")
print(basari.std())
print("****************************************************")

# Optimize parameters on model

from sklearn.model_selection import GridSearchCV

p = [{"criterion": ["entropy", "gini"], "splitter": ["best", "random"]}]

gs = GridSearchCV(estimator=dtc, param_grid=p, scoring="accuracy", cv=4, n_jobs=-1)
grid_search = gs.fit(x_train, y_train)

eniyisonuc = grid_search.best_score_
eniyiparametre = grid_search.best_params_

print("The best result")
print(eniyisonuc)
print("****************************************************")

print("The best parameter")
print(eniyiparametre)

Decision Tree
[[1 0]
 [1 6]]
****************************************************
Average success of model
0.8541666666666666
****************************************************
Average standard deviation of model
0.14877975892797607
****************************************************
The best result
0.8541666666666666
****************************************************
The best parameter
{'criterion': 'entropy', 'splitter': 'best'}


## 07 - RANDOM FOREST
-----------------

In [11]:
from sklearn.ensemble import RandomForestClassifier

# n_estimators default = 100
# criterion = ("entropy", "gini") default = "gini"

rfc = RandomForestClassifier(random_state = 1, n_estimators=5, criterion = 'entropy')

rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)

# Confusion Matrix

from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test,y_pred)

print('Random Forest')
print(cm)
print("****************************************************")

# K-Fold Cross Validation

from sklearn.model_selection import cross_val_score

basari = cross_val_score(estimator = rfc, X = X_train, y = y_train , cv = 4)

print("Average success of model")
print(basari.mean())   
print("****************************************************")

print("Average standard deviation of model")
print(basari.std())   
print("****************************************************")

# Optimize parameters on model

from sklearn.model_selection import GridSearchCV

p = [{"n_estimators": [1, 5, 10, 20, 40], "criterion": ["entropy", "gini"]}]

gs = GridSearchCV(estimator=rfc, param_grid=p, scoring="accuracy", cv=4, n_jobs=-1)
grid_search = gs.fit(x_train, y_train)

eniyisonuc = grid_search.best_score_
eniyiparametre = grid_search.best_params_

print("The best result")
print(eniyisonuc)

print("****************************************************")
print("The best parameter")
print(eniyiparametre)

Random Forest
[[0 1]
 [0 7]]
****************************************************
Average success of model
0.7916666666666666
****************************************************
Average standard deviation of model
0.21650635094610965
****************************************************
The best result
0.8333333333333334
****************************************************
The best parameter
{'criterion': 'entropy', 'n_estimators': 1}
