## Gathering data

In [1]:
import pandas as pd

In [2]:
import seaborn as sns

iris = sns.load_dataset('iris')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
iris['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [4]:
iris.shape

(150, 5)

## Separating data

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

encoder = LabelEncoder()
iris['species'] = encoder.fit_transform(iris['species'])

X = iris.drop('species', axis=1)
y = iris['species']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

## Basic implementation of machine learning

In [8]:
from sklearn.neighbors import KNeighborsClassifier as knn

model = knn(n_neighbors=3, algorithm='ball_tree')
model.fit(X_train, y_train)

In [9]:
y_predicted = model.predict(X_test)

In [10]:
y_predicted

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

## Implementing the performance metrics

In [24]:
# This problem has 3 classes, so that means it would have 9 total variables for performance, ex. AA, AB, AC, BA ...
# Due to the simplicity of this exercise, only 2 per class will be considered, instead of 3

def get_numbers (y_test, y_predicted):
    VP1, VP2, VP3 = 0, 0, 0
    FP1, FP2, FP3 = 0, 0, 0

    for i, j in zip(y_test, y_predicted):
        if i == 0 and i == j:
            VP1 += 1
        elif i == 0 and i != j:
            FP1 += 1
        elif i == 1 and i == j:
            VP2 += 1
        elif i == 1 and i != j:
            FP2 += 1
        elif i == 2 and i == j:
            VP3 += 1
        else:
            FP3 += 1

    return VP1, VP2, VP3, FP1, FP2, FP3

In [25]:
VP1, VP2, VP3, FP1, FP2, FP3 = get_numbers(y_test, y_predicted)

---

In [26]:
def sensibility(VP, FN):
    return VP / (VP + FN)

In [27]:
sensibility(VP1, FP2 + FP3)

1.0

In [29]:
def especificity(VN, FP):
    return VN / (FP + VN)

In [30]:
especificity(VP2 + VP3, FP1)

1.0

In [31]:
def accuracy(VP, VN, N):
    return (VP + VN) / N

In [33]:
accuracy(VP1, VP2 + VP3, FP1 + FP2 + FP3 + VP1 + VP2 + VP3)

1.0

In [34]:
def precision (VP, FP):
    return VP / (VP + FP)

In [35]:
precision(VP1, FP1)

1.0

In [36]:
def fscore(p, s):
    return 2 * (p*s) / (p + s)

In [37]:
fscore(precision(VP1, FP1), sensibility(VP1, FP2 + FP3))

1.0