In [1]:
from ucimlrepo import fetch_ucirepo
from typing import Tuple, List
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import random

In [2]:
np.random.seed(42)
K_NUMBER = 9
TARGET_COLUMN_NAME = "class"
TEST_SIZE = 0.2
HOLDOUT_INDEXES_NUMBER = 20
FIG_SIZE = (10, 8)
IMAGE_FONT_SIZE = 20
RESOLUTION_POINTS = 500

In [3]:
iris = fetch_ucirepo(id=53)

X_iris = iris.data.features
y_iris = iris.data.targets

iris_df = pd.concat([X_iris, y_iris], axis="columns")

In [4]:
iris_df.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [5]:
list(iris_df[TARGET_COLUMN_NAME].unique())

['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

In [6]:
class_mapping = {"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2}
iris_df[TARGET_COLUMN_NAME] = iris_df[TARGET_COLUMN_NAME].map(class_mapping)
iris_df[TARGET_COLUMN_NAME]

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: class, Length: 150, dtype: int64

In [7]:
iris_df[TARGET_COLUMN_NAME].unique()

array([0, 1, 2], dtype=int64)

In [8]:
from classifiers.bayesian_classifier import BayesianGaussianClassifier
from utils.classifier_utils import ClassifierUtils

knn_holdout_indexes = ClassifierUtils.create_multiple_holdout_indexes(
    df=iris_df, n_indexes=HOLDOUT_INDEXES_NUMBER, test_size=TEST_SIZE
)

knn_accuracy_list = []

for train_indexes, test_indexes in knn_holdout_indexes:
    X_train, y_train, X_test, y_test = ClassifierUtils.separate_train_test(
        df=iris_df,
        train_indexes=train_indexes,
        test_indexes=test_indexes,
        target_column_name=TARGET_COLUMN_NAME,
    )

    classifier = BayesianGaussianClassifier()
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test=X_test)

    accuracy = ClassifierUtils.calculate_accuracy(y_true=y_test, y_pred=y_pred)
    error_rate = ClassifierUtils.calculate_error_rate(y_true=y_test, y_pred=y_pred)

    confusion_matrix_result = ClassifierUtils.confusion_matrix(
        y_true=y_test, y_pred=y_pred
    )

    knn_accuracy_list.append(
        (
            accuracy,
            error_rate,
            train_indexes,
            test_indexes,
            y_pred,
            confusion_matrix_result,
        )
    )

df_results_knn = pd.DataFrame(
    knn_accuracy_list,
    columns=[
        "accuracy",
        "error_rate",
        "train_indexes",
        "test_indexes",
        "y_pred",
        "confusion_matrix",
    ],
)

df_results_knn

Unnamed: 0,accuracy,error_rate,train_indexes,test_indexes,y_pred,confusion_matrix
0,1.0,0.0,"[73, 18, 118, 78, 76, 31, 64, 141, 68, 82, 110...","[1, 129, 130, 140, 14, 144, 20, 21, 149, 37, 4...","[0, 2, 2, 2, 0, 2, 0, 0, 2, 0, 0, 0, 1, 1, 1, ...","[[7, 0, 0], [0, 11, 0], [0, 0, 12]]"
1,0.9,0.1,"[84, 20, 45, 47, 149, 114, 144, 117, 9, 90, 14...","[128, 129, 2, 133, 138, 12, 140, 142, 143, 26,...","[2, 2, 0, 1, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, ...","[[9, 0, 0], [0, 6, 2], [0, 1, 12]]"
2,0.966667,0.033333,"[54, 71, 117, 30, 58, 81, 10, 64, 51, 28, 131,...","[129, 133, 134, 7, 135, 141, 145, 18, 146, 20,...","[2, 1, 2, 0, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 0, ...","[[10, 0, 0], [0, 5, 0], [0, 1, 14]]"
3,1.0,0.0,"[51, 101, 131, 129, 106, 121, 141, 105, 37, 12...","[0, 1, 3, 135, 9, 140, 15, 143, 146, 149, 22, ...","[0, 0, 0, 2, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, ...","[[13, 0, 0], [0, 9, 0], [0, 0, 8]]"
4,0.966667,0.033333,"[120, 110, 123, 94, 6, 129, 76, 51, 82, 47, 13...","[0, 131, 5, 134, 136, 142, 144, 18, 24, 32, 35...","[0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[10, 0, 0], [0, 8, 1], [0, 0, 11]]"
5,1.0,0.0,"[85, 30, 108, 125, 10, 123, 95, 144, 82, 104, ...","[2, 130, 131, 132, 135, 11, 146, 29, 37, 46, 4...","[0, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 1, 1, 1, 1, ...","[[6, 0, 0], [0, 12, 0], [0, 0, 12]]"
6,0.966667,0.033333,"[15, 99, 57, 133, 87, 41, 75, 47, 81, 97, 113,...","[129, 130, 3, 5, 138, 139, 140, 20, 25, 30, 32...","[2, 2, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 1, ...","[[8, 0, 0], [0, 11, 1], [0, 0, 10]]"
7,0.966667,0.033333,"[143, 138, 95, 100, 58, 3, 104, 93, 107, 120, ...","[129, 4, 136, 9, 14, 142, 147, 148, 149, 25, 2...","[2, 0, 2, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, ...","[[10, 0, 0], [0, 7, 1], [0, 0, 12]]"
8,1.0,0.0,"[51, 12, 144, 118, 85, 74, 140, 48, 149, 78, 1...","[130, 134, 7, 135, 9, 138, 141, 147, 31, 33, 3...","[2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, ...","[[6, 0, 0], [0, 14, 0], [0, 0, 10]]"
9,0.966667,0.033333,"[93, 27, 124, 126, 52, 21, 37, 117, 119, 5, 13...","[128, 130, 133, 11, 16, 145, 18, 24, 34, 38, 3...","[2, 2, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[11, 0, 0], [0, 9, 0], [0, 1, 9]]"
