**IMPORTING LIBRARIES**

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.model_selection import train_test_split,cross_val_score, KFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from pycobra.classifiercobra import ClassifierCobra
from pycobra.diagnostics import Diagnostics

**LOADING DATASET**

In [2]:
data = pd.read_csv("haberman.csv")
data.head()

Unnamed: 0,Age,Year_of_operation,Axillary_nodes_detected,Survival_status
0,30,64,1,1
1,30,62,3,1
2,30,65,0,1
3,31,59,2,1
4,31,65,4,1


In [3]:
data['Survival_status'] = data['Survival_status'].replace([2],0)
data

Unnamed: 0,Age,Year_of_operation,Axillary_nodes_detected,Survival_status
0,30,64,1,1
1,30,62,3,1
2,30,65,0,1
3,31,59,2,1
4,31,65,4,1
...,...,...,...,...
301,75,62,1,1
302,76,67,0,1
303,77,65,3,1
304,78,65,1,0


**SPLITTING THE DATASET**

In [4]:
X = data.drop('Survival_status', axis = 1)
Y = data['Survival_status']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=10)

**TRAINING USING DIFFERENT CLASSIFIER**

In [5]:
algorithms = [('TREE', DecisionTreeClassifier()),('KNN', KNeighborsClassifier()) , ('SVM', SVC())]
# evaluate each model in turn
for name, algorithm in algorithms:
    kfold = KFold(n_splits=20, random_state=10,shuffle=True) 
    cv_results = cross_val_score(algorithm, X_train, Y_train, cv=kfold, scoring='accuracy')
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

TREE: 0.639091 (0.168197)
KNN: 0.722273 (0.141260)
SVM: 0.717273 (0.116097)


In [6]:
for name, algorithm in algorithms:
    model =  algorithm
    Y_pred = model.fit(X_train, Y_train).predict(X_test)
    print(name)
    print(classification_report(Y_test, Y_pred))

TREE
              precision    recall  f1-score   support

           0       0.32      0.45      0.38        20
           1       0.83      0.74      0.78        72

    accuracy                           0.67        92
   macro avg       0.57      0.59      0.58        92
weighted avg       0.72      0.67      0.69        92

KNN
              precision    recall  f1-score   support

           0       0.36      0.25      0.29        20
           1       0.81      0.88      0.84        72

    accuracy                           0.74        92
   macro avg       0.58      0.56      0.57        92
weighted avg       0.71      0.74      0.72        92

SVM
              precision    recall  f1-score   support

           0       0.33      0.10      0.15        20
           1       0.79      0.94      0.86        72

    accuracy                           0.76        92
   macro avg       0.56      0.52      0.51        92
weighted avg       0.69      0.76      0.71        92



**TRAINING USING COBRA**

In [7]:
cc = ClassifierCobra(machine_list='basic')
Y_pred_cob = cc.fit(X_train, Y_train).predict(X_test)

In [8]:
cc_diag = Diagnostics(cc, X_test, Y_test)
cc_diag.load_errors()
cc_diag.machine_error

{'ClassifierCobra': 0.21739130434782605,
 'tree': 0.25,
 'knn': 0.21739130434782605,
 'svm': 0.21739130434782605}

In [9]:
print("accuracy score = ", accuracy_score(Y_test, Y_pred_cob))
print("\nconfusion matrix =", confusion_matrix(Y_test, Y_pred_cob))
print("\nclassification report\n", classification_report(Y_test, Y_pred_cob))

accuracy score =  0.782608695652174

confusion matrix = [[ 4 16]
 [ 4 68]]

classification report
               precision    recall  f1-score   support

           0       0.50      0.20      0.29        20
           1       0.81      0.94      0.87        72

    accuracy                           0.78        92
   macro avg       0.65      0.57      0.58        92
weighted avg       0.74      0.78      0.74        92

