In [1]:
%config IPCompleter.greedy=True
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import sklearn.preprocessing, sklearn.datasets, sklearn.model_selection
import timeit
from progressbar import progressbar

In this notebook, we will be dealing with multiclass classification. We will have finally model, that can distinguish between all the numbers from the MNIST dataset and we will not need to deal with 4 and 9 only. The proper way of handling this problem is to use *softmax* function. I will show different approaches before, so we can compare them.

Firstly, we need the data. The template is still the same, so I will not describe it anymore.

In [2]:
data, target = sklearn.datasets.fetch_openml('mnist_784', version=1, return_X_y=True)
data = data.reshape(-1, 784)
data[data < 128] = 0
data[data > 0] = 1
data = np.hstack([data, np.ones((data.shape[0],1))])
train_data, test_data, train_target, test_target = sklearn.model_selection.train_test_split(data, target.astype(int), test_size=0.3, random_state=47)

# Perceptron

If you remember, we dealt with this problem in one of the previous notebook, when we were talking about perceptron algorithm. Just as a reminder, let's do it once again here, co we may compare the reults. I moved it into separate class, so I dont need to copy-paste it here once again. If you are interested, it is in the [src/perceptron.py](src/perceptron.py) file.

In [3]:
from src.perceptron_05 import multiclass_perceptron

train_acc, test_acc = multiclass_perceptron(train_data, train_target, test_data, test_target, iters=500, random_state=42)

print(f"Train accuracy: {train_acc}, Test accuracy: {test_acc}")

Train accuracy: 0.9741020408163266, Test accuracy: 0.911


The results don't tell much yet. The test accuracy is maybe too low compare to the train accuracy, but we will se how different models will behave.

# One-vs-one

In [3]:
from src.neuron_05 import Neuron

In [4]:
class BCELoss:
    def __call__(self, target, predicted):
        return np.sum(-target * np.log(np.maximum(predicted, 1e-15)) - (1 - target) * np.log(np.maximum(1 - predicted, 1e-15)), axis=0)
    def gradient(self, target, predicted):
        return - target / (np.maximum(predicted, 1e-15)) + (1 - target) / (np.maximum(1 - predicted, 1e-15))

In [13]:
# train models
models = np.empty((10,10), dtype=object)
for i in range(10):
    for j in range(i):
        models[i][j] = Neuron(BCELoss(), epochs=200, learning_rate=0.001, batch_size=128, random_state=42+i*10+j)
        mask = np.logical_or(train_target == i, train_target == j)
        current_X = train_data[mask]
        current_y = (train_target[mask] - j) / (i - j)
        models[i][j].fit(current_X, current_y, progress=True)

100% (200 of 200) |######################| Elapsed Time: 0:00:51 Time:  0:00:51
100% (200 of 200) |######################| Elapsed Time: 0:00:46 Time:  0:00:46
100% (200 of 200) |######################| Elapsed Time: 0:00:51 Time:  0:00:51
100% (200 of 200) |######################| Elapsed Time: 0:00:30 Time:  0:00:30
100% (200 of 200) |######################| Elapsed Time: 0:00:21 Time:  0:00:21
100% (200 of 200) |######################| Elapsed Time: 0:00:18 Time:  0:00:18
100% (200 of 200) |######################| Elapsed Time: 0:00:17 Time:  0:00:17
100% (200 of 200) |######################| Elapsed Time: 0:00:18 Time:  0:00:18
100% (200 of 200) |######################| Elapsed Time: 0:00:17 Time:  0:00:17
100% (200 of 200) |######################| Elapsed Time: 0:00:20 Time:  0:00:20
100% (200 of 200) |######################| Elapsed Time: 0:00:20 Time:  0:00:20
100% (200 of 200) |######################| Elapsed Time: 0:00:21 Time:  0:00:21
100% (200 of 200) |#####################

In [18]:
# predict
train_predictions = np.zeros((train_target.shape[0], 10), dtype=int)
test_predictions = np.zeros((test_target.shape[0], 10), dtype=int)
for i in range(10):
    for j in range(i):
        prediction = np.around(models[i][j].predict(train_data))
        train_predictions[prediction == 0, j] += 1
        train_predictions[prediction == 1, i] += 1
        prediction = np.around(models[i][j].predict(test_data))
        test_predictions[prediction == 0, j] += 1
        test_predictions[prediction == 1, i] += 1
train_predictions = train_predictions.argmax(axis=1)
test_predictions = test_predictions.argmax(axis=1)

In [19]:
train_acc = sklearn.metrics.accuracy_score(train_target, train_predictions)
test_acc = sklearn.metrics.accuracy_score(test_target, test_predictions)

In [20]:
print(f"Train accuracy: {train_acc}, Test accuracy: {test_acc}")

Train accuracy: 0.945, Test accuracy: 0.9122857142857143


In [22]:
# predict
train_predictions = np.zeros((train_target.shape[0], 10), dtype=float)
test_predictions = np.zeros((test_target.shape[0], 10), dtype=float)
for i in range(10):
    for j in range(i):
        prediction = models[i][j].predict(train_data)
        train_predictions[:, j] += 1-prediction
        train_predictions[:, i] += prediction
        prediction = models[i][j].predict(test_data)
        test_predictions[:, j] += 1-prediction
        test_predictions[:, i] += prediction
train_predictions = train_predictions.argmax(axis=1)
test_predictions = test_predictions.argmax(axis=1)

In [23]:
train_acc = sklearn.metrics.accuracy_score(train_target, train_predictions)
test_acc = sklearn.metrics.accuracy_score(test_target, test_predictions)

In [24]:
print(f"Train accuracy: {train_acc}, Test accuracy: {test_acc}")

Train accuracy: 0.9465714285714286, Test accuracy: 0.9174761904761904


# Tree based approach

# One-to-rest

# Softmax