<h1>Blood Classsifcation </h1>

In [5]:
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy
import numpy as np
import sklearn.metrics as metrics
from PRNN_final.LinearRegression import regression_classifier
from PRNN_final.knn import knn_naive

Lets import the required data and preprocess it

In [6]:
def normalize_images(images: np):
    new_images = np.zeros((images.shape[0], images.shape[1] * images.shape[2] * images.shape[3]))
    for i in range(images.shape[0]):
        new_images[i] = images[i].flatten()
    new_images = new_images.astype(np.float32)
    new_images /= 255.0
    return new_images


def load_input(inputpath):
    folder = np.load(inputpath)
    files = folder.files
    # print(files)
    # First we load the images and the labels
    un_train_images = folder['train_images.npy']
    un_val_images = folder['val_images.npy']
    un_test_images = folder['test_images.npy']
    train_labels = folder['train_labels.npy']
    val_labels = folder['val_labels.npy']
    test_labels = folder['test_labels.npy']
    # Then we normalize the images
    train_images = normalize_images(un_train_images)
    val_images = normalize_images(un_val_images)
    test_images = normalize_images(un_test_images)
    return train_images, val_images, test_images, train_labels, val_labels, test_labels

def one_hot_encoding(labels):
    one_hot_labels = numpy.zeros((labels.shape[0], 8))
    for i in range(labels.shape[0]):
        one_hot_labels[i][labels[i]] = 1
    return one_hot_labels

def plotter(x_graph, AUCscores, ACCscores, F1scores, title, xlabel,*,first=False):

    plt.plot(x_graph, AUCscores,alpha=0.5)
    plt.plot(x_graph, ACCscores,alpha=0.5 )
    plt.plot(x_graph, F1scores,alpha=0.5)
    plt.scatter(x_graph, AUCscores,s=6)
    plt.scatter(x_graph, ACCscores,s=6)
    plt.scatter(x_graph, F1scores,s=6)

    plt.xlabel(xlabel)

    plt.title(title)
    plt.ylim(0.5,1)
    plt.grid(True, which='major')
    plt.grid(True, which='minor',linestyle='--',linewidth=0.2)
    plt.minorticks_on()

    if first:
        plt.figlegend(['AUC', 'ACC', 'F1'])

In [7]:
path="bloodmnist.npz"
train_images, val_images, test_images, train_labels, val_labels, test_labels = load_input(path)
best_methods_AUC=dict()
best_methods_ACC=dict()

<h2> KNN Classifier </h2>

In [8]:
def knn_naive(X_train, Y_train, X_test, *, k=5, metric=2):
    # First we difeine a small function to return the k smallet elements of an array
    def k_smallest(arr, k):
        x = []
        min = np.inf
        pos_min = -1
        for i in range(k):
            for i in range(len(arr)):
                if arr[i] < min and i not in x:
                    min = arr[i]
                    pos_min = i
            if pos_min != -1:
                x.append(pos_min)
                min = np.inf
                pos_min = -1
        return x

    Y_test_predicted = np.zeros((X_test.shape[0], 1))
    if k == 0:
        return Y_test_predicted
    for i in range(X_test.shape[0]):
        distances = np.linalg.norm(np.abs(X_train - X_test[i, :]), ord=metric, axis=1)

        nearest = k_smallest(distances, k)
        topk_y = [i[0] for i in Y_train[nearest[:k]]]
        Y_test_predicted[i] = [np.argmax(np.bincount(topk_y))]

    return Y_test_predicted

In [None]:
k_max=6
no_of_tests=4
x_graph = np.zeros((no_of_tests,k_max))
AUCscores = np.zeros(x_graph.shape)
ACCscores = np.zeros(x_graph.shape)
F1scores = np.zeros(x_graph.shape)

for j in range(no_of_tests):
    metric=j
    if j==0:
        metric =-math.inf
    elif j==no_of_tests-1:
        metric = math.inf
    for k in range(0, k_max):
        test_pred = knn_naive(train_images, train_labels, test_images, k=k, metric=metric)
        x_graph[j,k] = k
        F1scores[j,k] =metrics.f1_score(test_labels,test_pred,average='weighted')
        ACCscores[j,k] =metrics.accuracy_score(test_labels,test_pred)
        #AUCscores[j,k] =metrics.roc_auc_score(test_labels,test_pred,multi_class='ovr')


In [None]:
mpl.rcParams['figure.dpi'] = 300
for j in range(no_of_tests):
    metric=j
    if j==0:
        metric =-math.inf
    elif j==no_of_tests-1:
        metric = math.inf
    plt.subplot(2,2,j+1)
    plotter(x_graph[j],AUCscores[j],ACCscores[j],F1scores[j],str(metric)+" norm","K value",first=(j==0))
    plt.xticks(np.linspace(0, k_max, 5))

plt.suptitle("KNN for binary classification with different metrics")
plt.tight_layout()

#plt.figlegend(['AUC', 'ACC', 'F1'])

plt.show()

In [None]:
count=0
print("best metrics according to AUC")
for i in np.argsort(AUCscores,axis=None,)[-3:]:
    best_metric,best_k =int( i//k_max), int(i%k_max    )
    best_score = AUCscores[int(best_metric)][int(best_k)]
    if best_metric==0:
        best_metric =-math.inf
    elif best_metric==no_of_tests-1:
        best_metric = math.inf
    print('\t'+str(3-count)+" best metric is ", best_metric,"(k=",best_k,")with a score of ",best_score)
    if 3-count==1:
        best_methods_AUC["KNN" + "(metric=" + str(best_metric) + ", k=" + str(best_k)]=best_score
    count+=1
print("best metrics according to ACC")
count=0
for i in np.argsort(ACCscores,axis=None,)[-3:]:
    best_metric,best_k =int( i//k_max), int(i%k_max    )
    best_score = ACCscores[int(best_metric)][int(best_k)]
    if best_metric==0:
        best_metric =-math.inf
    elif best_metric==no_of_tests-1:
        best_metric = math.inf
    print('\t'+str(3-count)+" best metric is ", best_metric,"(k=",best_k,")with a score of ",best_score)
    if 3-count==1:
        best_methods_AUC["KNN" + "(metric=" + str(best_metric) + ", k=" + str(best_k)]=best_score
    count+=1
%reset_selective -f AUCscores ACCscores F1scores x_graph k_max no_of_tests metric j k test_pred best_metric best_k best_score

<h2>Linear Regression </h2>


In [None]:
train_labels1 = one_hot_encoding(train_labels)
test_labels1 = one_hot_encoding(test_labels)

k_max=6
no_of_tests=4
x_graph = np.zeros((no_of_tests,k_max))
AUCscores = np.zeros(x_graph.shape)
ACCscores = np.zeros(x_graph.shape)
F1scores = np.zeros(x_graph.shape)

for j in range(no_of_tests):
    metric=j
    if j==0:
        metric =-math.inf
    elif j==no_of_tests-1:
        metric = math.inf
    for k in range(0, k_max):
        y_predonehot = regression_classifier(train_images, train_labels1, test_images, lambda_hyper=-0.01*math.exp(k*10))
        test_pred=_pred = np.argmax(y_predonehot, axis=1)
        x_graph[j,k] = k
        F1scores[j,k] =metrics.f1_score(test_labels,test_pred,average='weighted')
        ACCscores[j,k] =metrics.accuracy_score(test_labels,test_pred)
        #AUCscores[j,k] =metrics.roc_auc_score(test_labels,test_pred,multi_class='ovr')

In [None]:
plotter(x_graph, AUCscores, ACCscores, F1scores, "Linear CLassifier with L2 regulariser", "Regulariser ( $\lambda $) ")
plt.legend(['AUC', 'ACC', 'F1']);
count = 0
print("best metrics according to AUC")
for i in np.argsort(AUCscores, axis=None, )[-3:]:

    best_score = AUCscores[i]

    print('\t' + str(3 - count) + " best regulariser is  ", round(x_graph[i], 3), "With score", round(best_score, 3))
    if 3 - count == 1:
        best_methods_AUC["Least squares with regulariser ", x_graph[i]] = best_score
    count += 1
print("best metrics according to ACC")
count = 0

for i in np.argsort(ACCscores, axis=None, )[-3:]:

    best_score = ACCscores[i]

    print('\t' + str(3 - count) + " best regulariser is  ", round(x_graph[i], 3), "With score", round(best_score, 3))
    if 3 - count == 1:
        best_methods_ACC["Least squares with regulariser ", x_graph[i]] = best_score
    count += 1

% reset_selective -f AUCscores ACCscores F1scores x_graph k_max test_pred  best_score

In [None]:
def regressionstuff(train_images, train_labels0, test_images, test_labels0, km):
    train_labels1 = one_hot_encoding(train_labels0)
    test_labels1 = one_hot_encoding(test_labels0)
    x_graph = numpy.zeros(km)
    AUCscores = numpy.zeros(x_graph.size)
    ACCscores = numpy.zeros(x_graph.size)
    F1scores = numpy.zeros(x_graph.size)
    for k in range(km):
        y_predonehot = regression_classifier(train_images, train_labels1, test_images, lambda_hyper=-0.01*math.exp(k*10))

        y_pred = np.argmax(y_predonehot, axis=1)
        x_graph[k] = k
        ACCscores[k] = basic_classification_accuraccy(test_labels0, y_pred)
    plotter(x_graph, None, ACCscores, None, 'Regression', 'K')


def basic_classification_accuraccy(y_true, y_pred):
    correct = 0
    for i in range(len(y_true)):
        if y_true[i] == y_pred[i]:
            correct += 1
    return correct / len(y_true)


def k_means_evaluator(train_images, test_images, train_labels, test_labels, *, kmax=5):
    x_graph = numpy.zeros(kmax)
    ACCscores = numpy.zeros(x_graph.size)

    for k in range(0, kmax):
        Y_test = knn_naive(train_images, train_labels, test_images, k=k, metric=2)
        x_graph[k] = k
        # AUCscores[k] = EVALUATOR.getAUC(test_labels, Y_test, 'binary-class')
        # ACCscores[k] = EVALUATOR.getACC(test_labels, Y_test, 'binary-class')
        ACCscores[k] = basic_classification_accuraccy(test_labels, Y_test)

    plotter(x_graph, None, ACCscores, None, 'K-means based multiclass classifier', 'k')


def main(inputpath):


    # now we train the data using different methods
    # k_means_evaluator(train_images, test_images, train_labels, test_labels, kmax=10)

    regressionstuff(train_images, train_labels, test_images, test_labels, km=5)
