In [2]:
%config IPCompleter.greedy=True
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import sklearn.preprocessing, sklearn.datasets, sklearn.model_selection
import timeit
from progressbar import progressbar

In this notebook, we will be dealing with multiclass classification. We will have finally model, that can distinguish between all the numbers from the MNIST dataset and we will not need to deal with 4 and 9 only. The proper way of handling this problem is to use *softmax* function. I will show different approaches before, so we can compare them.

Firstly, we need the data. The template is still the same, so I will not describe it anymore.

In [25]:
data, target = sklearn.datasets.fetch_openml('mnist_784', version=1, return_X_y=True)
target = target.astype(int)
data = data.reshape(-1, 784)
data[data < 128] = 0
data[data > 0] = 1
data = np.hstack([data, np.ones((data.shape[0],1))])
train_data, test_data, train_target, test_target = sklearn.model_selection.train_test_split(data, target.astype(int), test_size=0.3, random_state=47)

# Perceptron

If you remember, we dealt with this problem in one of the previous notebook, when we were talking about perceptron algorithm. Just as a reminder, let's do it once again here, co we may compare the reults. I moved it into separate class, so I dont need to copy-paste it here once again. If you are interested, it is in the [src/perceptron.py](src/perceptron.py) file.

In [3]:
from src.perceptron_05 import multiclass_perceptron

train_acc, test_acc = multiclass_perceptron(train_data, train_target, test_data, test_target, iters=500, random_state=42)

print(f"Train accuracy: {train_acc}, Test accuracy: {test_acc}")

Train accuracy: 0.9741020408163266, Test accuracy: 0.911


The results don't tell much yet. The test accuracy is maybe too low compare to the train accuracy, but we will se how different models will behave.

# One-vs-one

In [11]:
from src.neuron_05 import Neuron

In [7]:
class BCELoss:
    def __call__(self, target, predicted):
        return np.sum(-target * np.log(np.maximum(predicted, 1e-15)) - (1 - target) * np.log(np.maximum(1 - predicted, 1e-15)), axis=0)
    def gradient(self, target, predicted):
        return - target / (np.maximum(predicted, 1e-15)) + (1 - target) / (np.maximum(1 - predicted, 1e-15))

In [13]:
# train models
models = np.empty((10,10), dtype=object)
for i in range(10):
    for j in range(i):
        models[i][j] = Neuron(BCELoss(), epochs=200, learning_rate=0.001, batch_size=128, random_state=42+i*10+j)
        mask = np.logical_or(train_target == i, train_target == j)
        current_X = train_data[mask]
        current_y = (train_target[mask] - j) / (i - j)
        models[i][j].fit(current_X, current_y, progress=True)

100% (200 of 200) |######################| Elapsed Time: 0:00:51 Time:  0:00:51
100% (200 of 200) |######################| Elapsed Time: 0:00:46 Time:  0:00:46
100% (200 of 200) |######################| Elapsed Time: 0:00:51 Time:  0:00:51
100% (200 of 200) |######################| Elapsed Time: 0:00:30 Time:  0:00:30
100% (200 of 200) |######################| Elapsed Time: 0:00:21 Time:  0:00:21
100% (200 of 200) |######################| Elapsed Time: 0:00:18 Time:  0:00:18
100% (200 of 200) |######################| Elapsed Time: 0:00:17 Time:  0:00:17
100% (200 of 200) |######################| Elapsed Time: 0:00:18 Time:  0:00:18
100% (200 of 200) |######################| Elapsed Time: 0:00:17 Time:  0:00:17
100% (200 of 200) |######################| Elapsed Time: 0:00:20 Time:  0:00:20
100% (200 of 200) |######################| Elapsed Time: 0:00:20 Time:  0:00:20
100% (200 of 200) |######################| Elapsed Time: 0:00:21 Time:  0:00:21
100% (200 of 200) |#####################

In [18]:
# predict
train_predictions = np.zeros((train_target.shape[0], 10), dtype=int)
test_predictions = np.zeros((test_target.shape[0], 10), dtype=int)
for i in range(10):
    for j in range(i):
        prediction = np.around(models[i][j].predict(train_data))
        train_predictions[prediction == 0, j] += 1
        train_predictions[prediction == 1, i] += 1
        prediction = np.around(models[i][j].predict(test_data))
        test_predictions[prediction == 0, j] += 1
        test_predictions[prediction == 1, i] += 1
train_predictions = train_predictions.argmax(axis=1)
test_predictions = test_predictions.argmax(axis=1)

In [19]:
train_acc = sklearn.metrics.accuracy_score(train_target, train_predictions)
test_acc = sklearn.metrics.accuracy_score(test_target, test_predictions)

In [20]:
print(f"Train accuracy: {train_acc}, Test accuracy: {test_acc}")

Train accuracy: 0.945, Test accuracy: 0.9122857142857143


In [22]:
# predict
train_predictions = np.zeros((train_target.shape[0], 10), dtype=float)
test_predictions = np.zeros((test_target.shape[0], 10), dtype=float)
for i in range(10):
    for j in range(i):
        prediction = models[i][j].predict(train_data)
        train_predictions[:, j] += 1-prediction
        train_predictions[:, i] += prediction
        prediction = models[i][j].predict(test_data)
        test_predictions[:, j] += 1-prediction
        test_predictions[:, i] += prediction
train_predictions = train_predictions.argmax(axis=1)
test_predictions = test_predictions.argmax(axis=1)

In [23]:
train_acc = sklearn.metrics.accuracy_score(train_target, train_predictions)
test_acc = sklearn.metrics.accuracy_score(test_target, test_predictions)

In [24]:
print(f"Train accuracy: {train_acc}, Test accuracy: {test_acc}")

Train accuracy: 0.9465714285714286, Test accuracy: 0.9174761904761904


# Tree based approach

In [150]:
class TreeInOrder:
    def __init__(self, loss, metrices=[], epochs=100, random_state=None, learning_rate=0.001, batch_size=16):
        self.loss = loss
        self.metrices = metrices
        self.epochs = epochs
        self.random_state = np.random.RandomState(random_state)
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        # models
        self.zerotofiveVSfivetoten = None
        self.zerotothreeVSthreetofive = None
        self.fivetoeightVSeighttoten = None
        self.zerototwoVStwo = None
        self.fivetosevenVSseven = None
        self.zeroVSone = None
        self.threeVSfour = None
        self.fiveVSsix = None
        self.eightVSnine = None
        
    def fit(self, X, y, progress=False):
        # model
        data = X.copy()
        target = y.copy()
        mask_zeros = np.any((target == 0, target == 1, target == 2, target == 3, target == 4), axis=0)
        mask_ones = np.any((target == 5, target == 6, target == 7, target == 8, target == 9), axis=0)
        data = data[mask_zeros | mask_ones]
        target[mask_zeros] = 0
        target[mask_ones] = 1
        target = target[mask_zeros | mask_ones]
        self.zerotofiveVSfivetoten = Neuron(self.loss, self.metrices, self.epochs, self.random_state.randint(0, 1000), self.learning_rate, self.batch_size)
        self.zerotofiveVSfivetoten.fit(data, target, progress=progress)
        # model
        data = X.copy()
        target = y.copy()
        mask_zeros = np.any((target == 0, target == 1, target == 2), axis=0)
        mask_ones = np.any((target == 3, target == 4), axis=0)
        data = data[mask_zeros | mask_ones]
        target[mask_zeros] = 0
        target[mask_ones] = 1
        target = target[mask_zeros | mask_ones]
        self.zerotothreeVSthreetofive = Neuron(self.loss, self.metrices, self.epochs, self.random_state.randint(0, 1000), self.learning_rate, self.batch_size)
        self.zerotothreeVSthreetofive.fit(data, target, progress=progress)
        # model
        data = X.copy()
        target = y.copy()
        mask_zeros = np.any((target == 5, target == 6, target == 7), axis=0)
        mask_ones = np.any((target == 8, target == 9), axis=0)
        data = data[mask_zeros | mask_ones]
        target[mask_zeros] = 0
        target[mask_ones] = 1
        target = target[mask_zeros | mask_ones]
        self.fivetoeightVSeighttoten = Neuron(self.loss, self.metrices, self.epochs, self.random_state.randint(0, 1000), self.learning_rate, self.batch_size)
        self.fivetoeightVSeighttoten.fit(data, target, progress=progress)
        # model
        data = X.copy()
        target = y.copy()
        mask_zeros = np.any((target == 0, target == 1), axis=0)
        mask_ones = np.any((target == 2,), axis=0)
        data = data[mask_zeros | mask_ones]
        target[mask_zeros] = 0
        target[mask_ones] = 1
        target = target[mask_zeros | mask_ones]
        self.zerototwoVStwo = Neuron(self.loss, self.metrices, self.epochs, self.random_state.randint(0, 1000), self.learning_rate, self.batch_size)
        self.zerototwoVStwo.fit(data, target, progress=progress)
        # model
        data = X.copy()
        target = y.copy()
        mask_zeros = np.any((target == 5, target == 6), axis=0)
        mask_ones = np.any((target == 7,), axis=0)
        data = data[mask_zeros | mask_ones]
        target[mask_zeros] = 0
        target[mask_ones] = 1
        target = target[mask_zeros | mask_ones]
        self.fivetosevenVSseven = Neuron(self.loss, self.metrices, self.epochs, self.random_state.randint(0, 1000), self.learning_rate, self.batch_size)
        self.fivetosevenVSseven.fit(data, target, progress=progress)
        # model
        data = X.copy()
        target = y.copy()
        mask_zeros = np.any((target == 0,), axis=0)
        mask_ones = np.any((target == 1,), axis=0)
        data = data[mask_zeros | mask_ones]
        target[mask_zeros] = 0
        target[mask_ones] = 1
        target = target[mask_zeros | mask_ones]
        self.zeroVSone = Neuron(self.loss, self.metrices, self.epochs, self.random_state.randint(0, 1000), self.learning_rate, self.batch_size)
        self.zeroVSone.fit(data, target, progress=progress)
        # model
        data = X.copy()
        target = y.copy()
        mask_zeros = np.any((target == 3,), axis=0)
        mask_ones = np.any((target == 4,), axis=0)
        data = data[mask_zeros | mask_ones]
        target[mask_zeros] = 0
        target[mask_ones] = 1
        target = target[mask_zeros | mask_ones]
        self.threeVSfour = Neuron(self.loss, self.metrices, self.epochs, self.random_state.randint(0, 1000), self.learning_rate, self.batch_size)
        self.threeVSfour.fit(data, target, progress=progress)
        # model
        data = X.copy()
        target = y.copy()
        mask_zeros = np.any((target == 5,), axis=0)
        mask_ones = np.any((target == 6,), axis=0)
        data = data[mask_zeros | mask_ones]
        target[mask_zeros] = 0
        target[mask_ones] = 1
        target = target[mask_zeros | mask_ones]
        self.fiveVSsix = Neuron(self.loss, self.metrices, self.epochs, self.random_state.randint(0, 1000), self.learning_rate, self.batch_size)
        self.fiveVSsix.fit(data, target, progress=progress)
        # model
        data = X.copy()
        target = y.copy()
        mask_zeros = np.any((target == 8,), axis=0)
        mask_ones = np.any((target == 9,), axis=0)
        data = data[mask_zeros | mask_ones]
        target[mask_zeros] = 0
        target[mask_ones] = 1
        target = target[mask_zeros | mask_ones]
        self.eightVSnine = Neuron(self.loss, self.metrices, self.epochs, self.random_state.randint(0, 1000), self.learning_rate, self.batch_size)
        self.eightVSnine.fit(data, target, progress=progress)

    def predict_probbased(self, X):
        probs = np.ones((len(X),10))
        pred = self.zerotofiveVSfivetoten.predict(X)
        probs[:,0:5] *= 1 - pred[:, np.newaxis]
        probs[:,5:10] *= pred[:, np.newaxis]
        pred = self.zerotothreeVSthreetofive.predict(X)
        probs[:,0:3] *= 1 - pred[:, np.newaxis]
        probs[:,3:5] *= pred[:, np.newaxis]
        pred = self.fivetoeightVSeighttoten.predict(X)
        probs[:,5:8] *= 1 - pred[:, np.newaxis]
        probs[:,8:10] *= pred[:, np.newaxis]
        pred = self.zerototwoVStwo.predict(X)
        probs[:,0:2] *= 1 - pred[:, np.newaxis]
        probs[:,2] *= pred
        pred = self.fivetosevenVSseven.predict(X)
        probs[:,5:7] *= 1 - pred[:, np.newaxis]
        probs[:,7] *= pred
        pred = self.zeroVSone.predict(X)
        probs[:,0] *= 1 - pred
        probs[:,1] *= pred
        pred = self.threeVSfour.predict(X)
        probs[:,3] *= 1 - pred
        probs[:,4] *= pred
        pred = self.fiveVSsix.predict(X)
        probs[:,5] *= 1 - pred
        probs[:,6] *= pred
        pred = self.eightVSnine.predict(X)
        probs[:,8] *= 1 - pred
        probs[:,9] *= pred
        return np.argmax(probs, axis=1)
    
    def predict_direct(self, X):
        result = np.ones((len(X),)) * -1
        pred1 = self.zerotofiveVSfivetoten.predict(X)
        pred2 = self.zerotothreeVSthreetofive.predict(X)
        pred3 = self.fivetoeightVSeighttoten.predict(X)
        pred4 = self.zerototwoVStwo.predict(X)
        pred5 = self.fivetosevenVSseven.predict(X)
        pred6 = self.zeroVSone.predict(X)
        pred7 = self.threeVSfour.predict(X)
        pred8 = self.fiveVSsix.predict(X)
        pred9 = self.eightVSnine.predict(X)
        result[np.all([pred1 < 0.5, pred2 < 0.5, pred4 < 0.5, pred6 < 0.5], axis=0)] = 0
        result[np.all([pred1 < 0.5, pred2 < 0.5, pred4 < 0.5, pred6 >= 0.5], axis=0)] = 1
        result[np.all([pred1 < 0.5, pred2 < 0.5, pred4 >= 0.5], axis=0)] = 2
        result[np.all([pred1 < 0.5, pred2 >= 0.5, pred7 < 0.5], axis=0)] = 3
        result[np.all([pred1 < 0.5, pred2 >= 0.5, pred7 >= 0.5], axis=0)] = 4
        result[np.all([pred1 >= 0.5, pred3 < 0.5, pred5 < 0.5, pred8 < 0.5], axis=0)] = 5
        result[np.all([pred1 >= 0.5, pred3 < 0.5, pred5 < 0.5, pred8 >= 0.5], axis=0)] = 6
        result[np.all([pred1 >= 0.5, pred3 < 0.5, pred5 >= 0.5], axis=0)] = 7
        result[np.all([pred1 >= 0.5, pred3 >= 0.5, pred9 < 0.5], axis=0)] = 8
        result[np.all([pred1 >= 0.5, pred3 >= 0.5, pred9 >= 0.5], axis=0)] = 9
        return result

In [155]:
tree = TreeInOrder(BCELoss(), epochs=400, learning_rate=0.001, batch_size=128, random_state=42)
tree.fit(train_data, train_target, progress=True)

100% (400 of 400) |######################| Elapsed Time: 0:02:35 Time:  0:02:35
100% (400 of 400) |######################| Elapsed Time: 0:01:51 Time:  0:01:51
100% (400 of 400) |######################| Elapsed Time: 0:01:49 Time:  0:01:49
100% (400 of 400) |######################| Elapsed Time: 0:01:36 Time:  0:01:36
100% (400 of 400) |######################| Elapsed Time: 0:01:32 Time:  0:01:32
100% (400 of 400) |######################| Elapsed Time: 0:00:53 Time:  0:00:53
100% (400 of 400) |######################| Elapsed Time: 0:00:45 Time:  0:00:45
100% (400 of 400) |######################| Elapsed Time: 0:00:52 Time:  0:00:52
100% (400 of 400) |######################| Elapsed Time: 0:01:14 Time:  0:01:14


In [156]:
train_predictions = tree.predict_probbased(train_data)
test_predictions = tree.predict_probbased(test_data)

In [157]:
train_acc = sklearn.metrics.accuracy_score(train_target, train_predictions)
test_acc = sklearn.metrics.accuracy_score(test_target, test_predictions)

In [158]:
print(f"Train accuracy: {train_acc}, Test accuracy: {test_acc}")

Train accuracy: 0.8295102040816327, Test accuracy: 0.814952380952381


In [159]:
train_predictions = tree.predict_direct(train_data)
test_predictions = tree.predict_direct(test_data)

In [160]:
train_acc = sklearn.metrics.accuracy_score(train_target, train_predictions)
test_acc = sklearn.metrics.accuracy_score(test_target, test_predictions)

In [161]:
print(f"Train accuracy: {train_acc}, Test accuracy: {test_acc}")

Train accuracy: 0.818530612244898, Test accuracy: 0.8069047619047619


# One-to-rest

# Softmax