## ML Evaluation Metrics

Modified from: https://www.datacourses.com/classification-model-evaluation-metrics-in-scikit-learn-924/

### Step 1: Import pandas, numpy, datasets from sklearn

In [4]:
import numpy as np
import pandas as pd
from sklearn import datasets

### Step 2: Load diabetes dataset

In [5]:
diabetes_df=pd.read_csv('./Data/diabetes.csv')
diabetes_df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
diabetes_df.Outcome.value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [7]:
x = diabetes_df.drop('Outcome',axis=1).values
y = diabetes_df['Outcome'].values

In [8]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1, stratify=y)

In [9]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 15)
knn.fit(x_train,y_train)
y_pred = knn.predict(x_test)

In [10]:
pd.DataFrame(data={'Predicted': y_pred, 'Actual': y_test}).head(15)

Unnamed: 0,Predicted,Actual
0,0,0
1,0,0
2,0,0
3,1,0
4,0,0
5,0,0
6,0,0
7,0,0
8,1,1
9,0,0


In [11]:
from sklearn.metrics import accuracy_score
print ('accuracy =', accuracy_score(y_test, y_pred))

accuracy = 0.7402597402597403


In [12]:
from sklearn.metrics import recall_score
recall_score(y_test, y_pred)

0.4444444444444444

In [13]:
from sklearn.metrics import precision_score
precision_score(y_test, y_pred)

0.7058823529411765

In [14]:
from sklearn.metrics import f1_score
f1_score(y_test, y_pred)

0.5454545454545455

In [15]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.75      0.90      0.82       100
           1       0.71      0.44      0.55        54

    accuracy                           0.74       154
   macro avg       0.73      0.67      0.68       154
weighted avg       0.73      0.74      0.72       154



In [16]:
import matplotlib.pyplot as plt  
from sklearn import metrics
metrics.plot_roc_curve(knn, x_test, y_test)
plt.plot([0,1],[0,1],'r--')

AttributeError: module 'sklearn.metrics' has no attribute 'plot_roc_curve'

In [17]:
metrics.__version

AttributeError: module 'sklearn.metrics' has no attribute '__version'

In [28]:
y_probas=knn.predict_proba(x_test)

In [32]:
from sklearn.metrics import roc_auc_score
probs = y_probas[:, 1]
print ('ROC AUC =', roc_auc_score(y_test, probs))

ROC AUC = 0.7864814814814814
