In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# scikit-learn comes with a number of toy datasets (https://sklearn.org/datasets/index.html#toy-datasets)
from sklearn import datasets

# Load the wine dataset from sklearn. You may want to take a look at the format of the dataset
wine = datasets.load_wine()

# Save the datapoints into the variable X and the targets into the variable y
X = wine.data
y = wine.target


In [3]:
# We import the function train_test_split from sklearn and use this to split the data
from sklearn.model_selection import train_test_split

# The function returns splits of each array passed in. 
# The proportion to be used as the training set is given by test_size
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# We first import the classifier
from sklearn.neighbors import KNeighborsClassifier

# We instantiate the classifier with 5 neighbours
knn = KNeighborsClassifier(n_neighbors=5)

# We fit the model using our training data
knn.fit(X_train, y_train)

# Finally, we generate predictions on the test data
ypred_test=knn.predict(X_test)

In [4]:
#load the data#
from sklearn.metrics import confusion_matrix

#the target test set = y_test and predicted values = ypred_test
cm = confusion_matrix(y_test, ypred_test)

In [5]:
#show matrix#
print (cm)

[[ 9  1  0]
 [ 1 10  3]
 [ 1  4  7]]


In [6]:
'''row=Predict  
column=True   
If A is what we want   

    A     B   
A   TP    FN   
B   FP    TN   

ps:   
TP=True Positive   
TN=True Negative  
FP=False Positive   
FN=False Negative'''

'row=Predict  \ncolumn=True   \nIf A is what we want   \n\n    A     B   \nA   TP    FN   \nB   FP    TN   \n\nps:   \nTP=True Positive   \nTN=True Negative  \nFP=False Positive   \nFN=False Negative'

In [7]:
def my_accuracy(y, pred):
    cm = confusion_matrix(y, pred)
    acc = np.diagonal(cm).sum()/cm.sum()
    return acc

### np.diagonal() 矩阵斜对角线， .sum就是求和###

In [8]:
'''Recall_i=True Positives/(TP+FN)   
Recall=1/k*sum of recall_i'''

'Recall_i=True Positives/(TP+FN)   \nRecall=1/k*sum of recall_i'

In [9]:
def my_recall_macro(y, pred):
    recalls = []
    cm = confusion_matrix(y, pred)
    TP = np.diagonal(cm)
    raw_sum = cm.sum(axis=1) #每一行求和#
    
    for i in range(len(cm)):
        recall_i = TP[i]/raw_sum[i]
        recalls.append(recall_i)

    return np.mean(recalls) 

In [10]:
'''Precision_1=TP/(TP+FP)     
Precision=1/k*sum of precision_i'''

'Precision_1=TP/(TP+FP)     \nPrecision=1/k*sum of precision_i'

In [11]:
def my_precision_macro(y, pred):
    precs = [] 
    cm = confusion_matrix(y, pred)
    col_sum = cm.sum(axis=0)
    TP = np.diagonal(cm)
    
    for i in range(len(cm)):
        precs_i = TP[i]/col_sum[i]
        precs.append(precs_i)
    

    return np.mean(precs)

Check that whether the functions has match those in sklearn.

In [12]:
from sklearn.metrics import precision_score, recall_score, accuracy_score
my_accuracy(y_test, ypred_test) == accuracy_score(y_test, ypred_test)

True

In [13]:
my_recall_macro(y_test, ypred_test)==recall_score(y_test, ypred_test, average='macro')

True

In [14]:
my_precision_macro(y_test, ypred_test)==precision_score(y_test, ypred_test, average='macro')

True