## Importing the libraries

In [140]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Importing the dataset

In [141]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:,1:-1].values   # Excluding the `sample code number`
y = dataset.iloc[:,-1].values

In [142]:
pd.set_option('display.max_columns', None)    # setting to display all columns in pandas dataframe
print(dataset.head())

   Sample code number  Clump Thickness  Uniformity of Cell Size  \
0             1000025                5                        1   
1             1002945                5                        4   
2             1015425                3                        1   
3             1016277                6                        8   
4             1017023                4                        1   

   Uniformity of Cell Shape  Marginal Adhesion  Single Epithelial Cell Size  \
0                         1                  1                            2   
1                         4                  5                            7   
2                         1                  1                            2   
3                         8                  1                            3   
4                         1                  3                            2   

   Bare Nuclei  Bland Chromatin  Normal Nucleoli  Mitoses  Class  
0            1                3                1       

## Splitting the dataset into the Training set and Testing set

In [143]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

## Feature Scaling

In [144]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Training various models on the Training set

### 1. Logistic Regression

In [145]:
from sklearn.linear_model import LogisticRegression
clf_lr = LogisticRegression(random_state=0)
clf_lr.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [146]:
y_pred_lr = clf_lr.predict(X_test)

### 2. K-Nearest Neighbor (K-NN)

In [147]:
from sklearn.neighbors import KNeighborsClassifier
clf_knn = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
clf_knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [148]:
y_pred_knn = clf_knn.predict(X_test)

### 3. Support Vector Machine (SVM)

In [149]:
from sklearn.svm import SVC
clf_svc = SVC(kernel='linear', random_state=0)
clf_svc.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

In [150]:
y_pred_svc = clf_svc.predict(X_test)

### 4. Kernel SVM

In [151]:
from sklearn.svm import SVC
clf_kernelSVC = SVC(kernel='rbf', random_state=0)
clf_kernelSVC.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

In [152]:
y_pred_kernelSVC = clf_kernelSVC.predict(X_test)

### 5. Naïve Bayes

In [153]:
from sklearn.naive_bayes import GaussianNB
clf_nb = GaussianNB()
clf_nb.fit(X_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [154]:
y_pred_nb = clf_nb.predict(X_test)

### 6. Decision Tree

  #### 6.1 with **GINI**

In [155]:
from sklearn.tree import DecisionTreeClassifier
clf_dtGINI = DecisionTreeClassifier(criterion='gini',  random_state=0)
clf_dtGINI.fit(X_train, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

In [156]:
y_pred_dtGINI = clf_dtGINI.predict(X_test)

  #### 6.2 with **ENTROPY**

In [157]:
from sklearn.tree import DecisionTreeClassifier
clf_dtENTROPY = DecisionTreeClassifier(criterion='entropy',  random_state=0)
clf_dtENTROPY.fit(X_train, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

In [158]:
y_pred_dtENTROPY = clf_dtENTROPY.predict(X_test)

### 7. Random Forest Classifier

  #### 7.2 with **ENTROPY**

In [159]:
from sklearn.ensemble import RandomForestClassifier
clf_rfcGINI = RandomForestClassifier(n_estimators = 10, criterion = 'gini', random_state = 0)
clf_rfcGINI.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [160]:
y_pred_rfcGINI = clf_rfcGINI.predict(X_test)

  #### 7.2 with **ENTROPY**

In [161]:
from sklearn.ensemble import RandomForestClassifier
clf_rfcENTROPY = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
clf_rfcENTROPY.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [172]:
y_pred_rfcENTROPY = clf_rfcENTROPY.predict(X_test)

## Evaluating the model performance with Confusion Matrix

In [173]:
pip install -U prettytable



In [174]:
from prettytable import PrettyTable
from sklearn.metrics import confusion_matrix, accuracy_score

In [175]:
evaluataionTable = PrettyTable()
evaluataionTable.field_names = ["Model", "Confusion Matrix", "Accuracy"]
evaluataionTable.add_row(["Logistic Regression", confusion_matrix(y_test, y_pred_lr), accuracy_score(y_test, y_pred_lr)])
evaluataionTable.add_row(["------------------------------", "------------------", "--------------------"])
evaluataionTable.add_row(["K Nearest Neighbor", confusion_matrix(y_test, y_pred_knn), accuracy_score(y_test, y_pred_knn)])
evaluataionTable.add_row(["------------------------------", "------------------", "--------------------"])
evaluataionTable.add_row(["Support Vector Machine", confusion_matrix(y_test, y_pred_svc), accuracy_score(y_test, y_pred_svc)])
evaluataionTable.add_row(["------------------------------", "------------------", "--------------------"])
evaluataionTable.add_row(["SVM Kernel", confusion_matrix(y_test, y_pred_kernelSVC), accuracy_score(y_test, y_pred_kernelSVC)])
evaluataionTable.add_row(["------------------------------", "------------------", "--------------------"])
evaluataionTable.add_row(["Naïve Bayes", confusion_matrix(y_test, y_pred_nb), accuracy_score(y_test, y_pred_nb)])
evaluataionTable.add_row(["------------------------------", "------------------", "--------------------"])
evaluataionTable.add_row(["Decision Tree (with GINI)", confusion_matrix(y_test, y_pred_dtGINI), accuracy_score(y_test, y_pred_dtGINI)])
evaluataionTable.add_row(["------------------------------", "------------------", "--------------------"])
evaluataionTable.add_row(["Decision Tree (with Entropy)", confusion_matrix(y_test, y_pred_dtENTROPY), accuracy_score(y_test, y_pred_dtENTROPY)])
evaluataionTable.add_row(["------------------------------", "------------------", "--------------------"])
evaluataionTable.add_row(["Random Forest (with GINI)", confusion_matrix(y_test, y_pred_rfcGINI), accuracy_score(y_test, y_pred_rfcGINI)])
evaluataionTable.add_row(["------------------------------", "------------------", "--------------------"])
evaluataionTable.add_row(["Random Forest (with ENTROPY)", confusion_matrix(y_test, y_pred_rfcENTROPY), accuracy_score(y_test, y_pred_rfcENTROPY)])
print(evaluataionTable)

+--------------------------------+--------------------+----------------------+
|             Model              |  Confusion Matrix  |       Accuracy       |
+--------------------------------+--------------------+----------------------+
|      Logistic Regression       |     [[103   4]     |  0.9473684210526315  |
|                                |     [  5  59]]     |                      |
| ------------------------------ | ------------------ | -------------------- |
|       K Nearest Neighbor       |     [[103   4]     |  0.9473684210526315  |
|                                |     [  5  59]]     |                      |
| ------------------------------ | ------------------ | -------------------- |
|     Support Vector Machine     |     [[102   5]     |  0.9415204678362573  |
|                                |     [  5  59]]     |                      |
| ------------------------------ | ------------------ | -------------------- |
|           SVM Kernel           |     [[101   6]   

### Confusion Matrix : 

In [176]:
confusionMatrixTable = PrettyTable()
confusionMatrixTable.field_names = ["Model", "Accuracy"]
confusionMatrixTable.add_row(["Logistic Regression", confusion_matrix(y_test, y_pred_lr)])
confusionMatrixTable.add_row(["K Nearest Neighbor", confusion_matrix(y_test, y_pred_knn)])
confusionMatrixTable.add_row(["Support Vector Machine", confusion_matrix(y_test, y_pred_svc)])
confusionMatrixTable.add_row(["SVM Kernel", confusion_matrix(y_test, y_pred_kernelSVC)])
confusionMatrixTable.add_row(["Naïve Bayes", confusion_matrix(y_test, y_pred_nb)])
confusionMatrixTable.add_row(["Decision Tree (with GINI)", confusion_matrix(y_test, y_pred_dtGINI)])
confusionMatrixTable.add_row(["Decision Tree (with Entropy)", confusion_matrix(y_test, y_pred_dtENTROPY)])
confusionMatrixTable.add_row(["Random Forest (with GINI)", confusion_matrix(y_test, y_pred_rfcGINI)])
confusionMatrixTable.add_row(["Random Forest (with ENTROPY)", confusion_matrix(y_test, y_pred_rfcENTROPY)])
print(confusionMatrixTable)

+------------------------------+-------------+
|            Model             |   Accuracy  |
+------------------------------+-------------+
|     Logistic Regression      |  [[103   4] |
|                              |  [  5  59]] |
|      K Nearest Neighbor      |  [[103   4] |
|                              |  [  5  59]] |
|    Support Vector Machine    |  [[102   5] |
|                              |  [  5  59]] |
|          SVM Kernel          |  [[101   6] |
|                              |  [  3  61]] |
|         Naïve Bayes          |   [[99  8]  |
|                              |   [ 2 62]]  |
|  Decision Tree (with GINI)   |  [[105   2] |
|                              |  [  6  58]] |
| Decision Tree (with Entropy) |  [[104   3] |
|                              |  [  4  60]] |
|  Random Forest (with GINI)   |  [[104   3] |
|                              |  [  8  56]] |
| Random Forest (with ENTROPY) |  [[104   3] |
|                              |  [  5  59]] |
+------------

### Accuracy Table : 

In [177]:
AccuracyTable = PrettyTable()
AccuracyTable.field_names = ["Model", "Accuracy"]
AccuracyTable.add_row(["Logistic Regression", accuracy_score(y_test, y_pred_lr)])
AccuracyTable.add_row(["K Nearest Neighbor", accuracy_score(y_test, y_pred_knn)])
AccuracyTable.add_row(["Support Vector Machine", accuracy_score(y_test, y_pred_svc)])
AccuracyTable.add_row(["SVM Kernel", accuracy_score(y_test, y_pred_kernelSVC)])
AccuracyTable.add_row(["Naïve Bayes", accuracy_score(y_test, y_pred_nb)])
AccuracyTable.add_row(["Decision Tree (with GINI)", accuracy_score(y_test, y_pred_dtGINI)])
AccuracyTable.add_row(["Decision Tree (with Entropy)", accuracy_score(y_test, y_pred_dtENTROPY)])
AccuracyTable.add_row(["Random Forest (with GINI)", accuracy_score(y_test, y_pred_rfcGINI)])
AccuracyTable.add_row(["Random Forest (with ENTROPY)", accuracy_score(y_test, y_pred_rfcENTROPY)])
print(AccuracyTable)

+------------------------------+--------------------+
|            Model             |      Accuracy      |
+------------------------------+--------------------+
|     Logistic Regression      | 0.9473684210526315 |
|      K Nearest Neighbor      | 0.9473684210526315 |
|    Support Vector Machine    | 0.9415204678362573 |
|          SVM Kernel          | 0.9473684210526315 |
|         Naïve Bayes          | 0.9415204678362573 |
|  Decision Tree (with GINI)   | 0.9532163742690059 |
| Decision Tree (with Entropy) | 0.9590643274853801 |
|  Random Forest (with GINI)   | 0.935672514619883  |
| Random Forest (with ENTROPY) | 0.9532163742690059 |
+------------------------------+--------------------+
