# Classification Model Selection

## Data Preprocessing

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [6]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [7]:
print(X_train)

[[-1.38219432  0.91903747  0.9407658  ...  2.22576767  2.27129602
   0.24623928]
 [ 0.03390689  1.27578287 -0.04290763 ...  1.82407819  1.94996317
   3.74830911]
 [ 0.22797663  1.27578287  2.25233038 ...  2.62745714  2.27129602
  -0.33743902]
 ...
 [ 0.16939025 -1.22143494 -0.69868992 ... -0.98774815 -0.62069958
  -0.33743902]
 [ 0.29888258 -0.50794414 -0.69868992 ... -0.58605867 -0.62069958
  -0.33743902]
 [-1.04129794  1.98927367  1.92443923 ...  1.42238871  1.30729749
  -0.33743902]]


In [8]:
print(X_test)

[[ 0.11037076 -1.22143494 -0.69868992 ... -0.98774815 -0.62069958
  -0.33743902]
 [ 0.08526811 -0.50794414 -0.69868992 ... -0.58605867 -0.62069958
  -0.33743902]
 [-0.56596836  0.20554667  0.61287466 ...  0.21732028  0.02196611
  -0.33743902]
 ...
 [-0.48116108  0.20554667 -0.69868992 ... -0.18436919 -0.62069958
   0.24623928]
 [ 0.05794779 -0.86468954 -0.37079877 ...  1.42238871 -0.62069958
  -0.33743902]
 [ 0.09172701 -0.86468954 -0.69868992 ... -0.18436919 -0.62069958
  -0.33743902]]


## Logistic Regression

In [9]:
from sklearn.linear_model import LogisticRegression
classifier1 =  LogisticRegression(random_state = 0)
classifier1.fit(X_train, y_train)

In [10]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred1 = classifier1.predict(X_test)
cm = confusion_matrix(y_test, y_pred1)
print(cm)
accuracy_score(y_test, y_pred1)

[[103   4]
 [  5  59]]


0.9473684210526315

## KNN

In [11]:
from sklearn.neighbors import KNeighborsClassifier
classifier2 = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
classifier2.fit(X_train, y_train)

In [13]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred2 = classifier2.predict(X_test)
cm = confusion_matrix(y_test, y_pred2)
print(cm)
accuracy_score(y_test, y_pred2)

[[103   4]
 [  5  59]]


0.9473684210526315

## Support Vector Machine with Linear kernel

In [16]:
from sklearn.svm import SVC
classifier3 = SVC(kernel='linear', random_state=0)
classifier3.fit(X_train, y_train)

In [17]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred3 = classifier3.predict(X_test)
cm = confusion_matrix(y_test, y_pred3)
print(cm)
accuracy_score(y_test, y_pred3)

[[102   5]
 [  5  59]]


0.9415204678362573

## Support Vector Machine with RBF kernel

In [18]:
from sklearn.svm import SVC
classifier4 = SVC(kernel='rbf', random_state=0)
classifier4.fit(X_train, y_train)

In [22]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred4 = classifier4.predict(X_test)
cm = confusion_matrix(y_test, y_pred4)
print(cm)
accuracy_score(y_test, y_pred4)

[[102   5]
 [  3  61]]


0.9532163742690059

## Naive Bayes

In [21]:
from sklearn.naive_bayes import GaussianNB
classifier5 = GaussianNB()
classifier5.fit(X_train, y_train)

In [23]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred5 = classifier5.predict(X_test)
cm = confusion_matrix(y_test, y_pred5)
print(cm)
accuracy_score(y_test, y_pred5)

[[99  8]
 [ 2 62]]


0.9415204678362573

## Decision Tree Classification

In [24]:
from sklearn.tree import DecisionTreeClassifier
classifier6 = DecisionTreeClassifier(criterion='entropy', random_state=0)
classifier6.fit(X_train, y_train)

In [25]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred6 = classifier6.predict(X_test)
cm = confusion_matrix(y_test, y_pred6)
print(cm)
accuracy_score(y_test, y_pred6)

[[103   4]
 [  3  61]]


0.9590643274853801

## Random Forest Classification

In [33]:
from sklearn.ensemble import RandomForestClassifier
classifier7 = RandomForestClassifier(n_estimators=20, criterion='gini', random_state=0)
classifier7.fit(X_train, y_train)

In [34]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred7 = classifier7.predict(X_test)
cm = confusion_matrix(y_test, y_pred7)
print(cm)
accuracy_score(y_test, y_pred7)

[[103   4]
 [  5  59]]


0.9473684210526315