In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [15]:
# Data Preprocessing
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

# Encoding dependent variable
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

# Splitting the dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [16]:
# Traning the classification model on the training set

# Logistic Regression
from sklearn.linear_model import LogisticRegression
log_classifier = LogisticRegression(random_state=0)
log_classifier.fit(X_train, y_train)

# Euclidean KNN
from sklearn.neighbors import KNeighborsClassifier
euc_knn_classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
euc_knn_classifier.fit(X_train, y_train)

# Manhattan KNN
man_knn_classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=1)
man_knn_classifier.fit(X_train, y_train)

# Chebyshev KNN
che_knn_classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=float('inf'))
che_knn_classifier.fit(X_train, y_train)

# linear SVM
from sklearn.svm import SVC
lin_svm_classifier = SVC(kernel='linear', random_state=0)
lin_svm_classifier.fit(X_train, y_train)

# RBF SVM
rbf_svm_classifier = SVC(kernel='rbf', random_state=0)
rbf_svm_classifier.fit(X_train, y_train)

# polynomial SVM
poly_svm_classifier = SVC(kernel='poly', random_state=0)
poly_svm_classifier.fit(X_train, y_train)

# sigmoid SVM
sigmoid_svm_classifier = SVC(kernel='sigmoid', random_state=0)
sigmoid_svm_classifier.fit(X_train, y_train)

# Naive Bayes
from sklearn.naive_bayes import GaussianNB
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Decision Tree
from sklearn.tree import DecisionTreeClassifier
dt_classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
dt_classifier.fit(X_train, y_train)

# Random Forest
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
rf_classifier.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)

In [17]:
# Evaluating Model Performances on the test set

log_accuracy = log_classifier.score(X_test, y_test)
euc_knn_accuracy = euc_knn_classifier.score(X_test, y_test)
man_knn_accuracy = man_knn_classifier.score(X_test, y_test)
che_knn_accuracy = che_knn_classifier.score(X_test, y_test)
lin_svm_accuracy = lin_svm_classifier.score(X_test, y_test)
rbf_svm_accuracy = rbf_svm_classifier.score(X_test, y_test)
poly_svm_accuracy = poly_svm_classifier.score(X_test, y_test)
sigmoid_svm_accuracy = sigmoid_svm_classifier.score(X_test, y_test)
nb_accuracy = nb_classifier.score(X_test, y_test)
dt_accuracy = dt_classifier.score(X_test, y_test)
rf_accuracy = rf_classifier.score(X_test, y_test)

print('Accuracy of the models :')
print('Logistic          : ',log_accuracy)
print('Euclidean KNN     : ',euc_knn_accuracy)
print('Manhattan KNN     : ',man_knn_accuracy)
print('Chebyshev KNN     : ',che_knn_accuracy)
print('Linear SVM        : ',lin_svm_accuracy)
print('RBF SVM           : ',rbf_svm_accuracy)
print('Polynomial SVM    : ',poly_svm_accuracy)
print('Sigmoid SVM       : ',sigmoid_svm_accuracy)
print('Naive Bayes       : ',nb_accuracy)
print('Decision Tree     : ',dt_accuracy)
print('Random Forest     : ',rf_accuracy)

Accuracy of the models :
Logistic          :  0.9562043795620438
Euclidean KNN     :  0.948905109489051
Manhattan KNN     :  0.9562043795620438
Chebyshev KNN     :  0.948905109489051
Linear SVM        :  0.9635036496350365
RBF SVM           :  0.9562043795620438
Polynomial SVM    :  0.9562043795620438
Sigmoid SVM       :  0.948905109489051
Naive Bayes       :  0.948905109489051
Decision Tree     :  0.9708029197080292
Random Forest     :  0.9708029197080292


In [None]:
# Creating confusion matrix
from sklearn.metrics import confusion_matrix, accuracy_score
log_y_pred = log_classifier.predict(X_test)
euc_knn_y_pred = euc_knn_classifier.predict(X_test)
man_knn_y_pred = man_knn_classifier.predict(X_test)
che_knn_y_pred = che_knn_classifier.predict(X_test)
lin_svm_y_pred = lin_svm_classifier.predict(X_test)
rbf_svm_y_pred = rbf_svm_classifier.predict(X_test)
poly_svm_y_pred = poly_svm_classifier.predict(X_test)
sigmoid_svm_y_pred = sigmoid_svm_classifier.predict(X_test)
nb_y_pred = nb_classifier.predict(X_test)
dt_y_pred = dt_classifier.predict(X_test)
rf_y_pred = rf_classifier.predict(X_test)

