# Bayesian Classifiers

In [1]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.naive_bayes import GaussianNB

## Data Preprocessing

### SciKitLearn digits

In [2]:
from sklearn import datasets
digits = datasets.load_digits()
x_digits = digits.data
y_digits = digits.target

Spliting the data set into 70% training data and 30% test data.

In [3]:
from sklearn.model_selection import train_test_split
x_digits_train, x_digits_test, y_digits_train, y_digits_test = train_test_split(x_digits, y_digits, test_size=0.3, random_state=42)

### SciKitLearn digits summarised

Modify your data set to contain only three values for the attributes:

In [4]:
# 0 for 'dark', 1 for 'grey' and 2 for 'light',
X = np.array(x_digits)
for i in range(len(x_digits)):
    for j in range(64):
        if x_digits[i,j] < 5:
            X[i,j] = 0
        elif x_digits[i,j] > 10:
            X[i,j] = 2
        else:
            X[i,j] = 1            
# Splitting the new data
x_sum_train, x_sum_test, y_sum_train, y_sum_test = train_test_split(X, y_digits, test_size=0.3, random_state=42)

### MNIST_Light

In [5]:
import glob
from PIL import Image
filelist = sorted(glob.glob('MNIST_Light/*/*.png'))
#x = np.array([np.array(Image.open(fname).resize((8,8))) for fname in filelist])
x = np.array([np.array(Image.open(fname)) for fname in filelist])

samples_per_class = 500
number_of_classes = 10

y = np.zeros(number_of_classes * samples_per_class,dtype=int)
for cls in range(1, number_of_classes):
    y[(cls*500):(cls+1)*500] = cls

split the set into 70% training and 30% test data

In [6]:
train_features, test_features, train_labels, test_labels = train_test_split(x, y, test_size=0.3, random_state=42)

One flattened array per image && the normalization of pixels from [0 ... 255] to [0.0 ... 1.0]

In [7]:
train_normalised = train_features.reshape(3500, 400)/1.0
test_normalised = test_features.reshape(1500, 400)/1.0
x_light_train, x_light_test, y_light_train, y_light_test =  train_normalised, test_normalised, train_labels, test_labels
x_norm_train, x_norm_test = x_light_train/255.0, x_light_test/255.0

## Nearest Centroid Classifier (NCC)

In [346]:
import heapq
class NCC:
    def __init__(self):
        self.centroids = dict()
        self.nbLabels = dict()
    
    # The NCC fit method should simply compute 
    # the mean values over the attribute values of the examples for each class
    def fit(self, data, target):
        data = np.array(data)
        target = np.array(target)
        
        for i in range(data.shape[0]):
            if target[i] not in self.centroids:
                self.centroids[target[i]] = data[i]
                self.nbLabels[target[i]] = 1
            else:
                self.centroids[target[i]] += data[i]
                self.nbLabels[target[i]] += 1
        for label in self.nbLabels:
            self.centroids[label] /= self.nbLabels[label]
    
    # Prediction is then done by finding 
    # the argmin over the distances from the class centroids for each sample
    def predict(self, test):
        x = np.array(test)
        length = x.shape[0]
        y_hat = np.zeros((length, ))  
        for i in range(length):
            distance = dict()
            distances = []
            for label in self.nbLabels:
                norm = np.linalg.norm(x[i]-self.centroids[label])
                heapq.heappush(distances, norm)
                distance[norm] = label
            y_hat[i] = distance[distances[0]]
        return y_hat
            
    def __repr__(self):
        return "NCC()"
    def __str__(self):
        return "NCC()"

#### SciKitLearn digits

In [347]:
ncc = NCC()
ncc.fit(x_digits_train, y_digits_train)
y_hat = ncc.predict(x_digits_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_digits_test, y_hat), "\n")
print("classification report:\n", classification_report(y_digits_test, y_hat), "\n")

Confusion matrix:
 [[52  0  0  0  0  1  0  0  0  0]
 [ 0 35  7  0  0  0  0  0  4  4]
 [ 0  1 43  1  0  0  0  0  2  0]
 [ 0  0  0 48  0  0  0  1  4  1]
 [ 0  3  0  0 55  0  0  2  0  0]
 [ 0  0  0  0  1 52  1  0  0 12]
 [ 1  0  0  0  0  0 52  0  0  0]
 [ 0  0  0  0  0  1  0 54  0  0]
 [ 0  3  0  0  0  1  0  0 38  1]
 [ 0  1  0  1  1  1  0  3  1 51]] 

classification report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98        53
           1       0.81      0.70      0.75        50
           2       0.86      0.91      0.89        47
           3       0.96      0.89      0.92        54
           4       0.96      0.92      0.94        60
           5       0.93      0.79      0.85        66
           6       0.98      0.98      0.98        53
           7       0.90      0.98      0.94        55
           8       0.78      0.88      0.83        43
           9       0.74      0.86      0.80        59

    accuracy                

#### SciKitLearn digits summarised

In [348]:
ncc = NCC()
ncc.fit(x_sum_train, y_sum_train)
y_sum_hat = ncc.predict(x_sum_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_sum_test, y_sum_hat), "\n")
print("classification report:\n", classification_report(y_sum_test, y_sum_hat), "\n")

Confusion matrix:
 [[51  0  0  0  1  1  0  0  0  0]
 [ 0 33  6  0  0  1  0  0  6  4]
 [ 0  3 41  1  0  0  0  0  2  0]
 [ 0  0  0 46  0  0  0  2  4  2]
 [ 0  3  0  0 55  0  0  2  0  0]
 [ 0  0  0  0  1 53  2  0  0 10]
 [ 1  0  0  0  0  0 52  0  0  0]
 [ 0  0  0  0  0  0  0 55  0  0]
 [ 0  2  0  0  0  1  0  0 37  3]
 [ 0  1  0  2  1  1  0  3  1 50]] 

classification report:
               precision    recall  f1-score   support

           0       0.98      0.96      0.97        53
           1       0.79      0.66      0.72        50
           2       0.87      0.87      0.87        47
           3       0.94      0.85      0.89        54
           4       0.95      0.92      0.93        60
           5       0.93      0.80      0.86        66
           6       0.96      0.98      0.97        53
           7       0.89      1.00      0.94        55
           8       0.74      0.86      0.80        43
           9       0.72      0.85      0.78        59

    accuracy                

#### MNIST_Light

In [349]:
ncc = NCC()
ncc.fit(x_light_train, y_light_train)
y_light_hat = ncc.predict(x_light_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_light_test, y_light_hat), "\n")
print("classification report:\n", classification_report(y_light_test, y_light_hat), "\n")

Confusion matrix:
 [[150   0   2   0   0   6   3   1   2   0]
 [  0 148   0   0   0   2   0   0   2   0]
 [  0  15 113   8   2   3   3   1   8   2]
 [  1   5   8 117   1   7   1   2   8   4]
 [  1   4   2   0 108   0   3   0   1  24]
 [  3   9   0  24   4  97   2   0   1   1]
 [  3   6   2   0   4   5 123   0   0   0]
 [  1  14   2   0   6   1   0 127   1   6]
 [  3   6   4   8   0  12   1   0  95   3]
 [  3   0   1   1  19   2   1   3   2 126]] 

classification report:
               precision    recall  f1-score   support

           0       0.91      0.91      0.91       164
           1       0.71      0.97      0.82       152
           2       0.84      0.73      0.78       155
           3       0.74      0.76      0.75       154
           4       0.75      0.76      0.75       143
           5       0.72      0.69      0.70       141
           6       0.90      0.86      0.88       143
           7       0.95      0.80      0.87       158
           8       0.79      0.72    

#### MNIST_Light: Normalized

In [350]:
ncc = NCC()
ncc.fit(x_norm_train, y_light_train)
y_light_hat = ncc.predict(x_norm_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_light_test, y_light_hat), "\n")
print("classification report:\n", classification_report(y_light_test, y_light_hat), "\n")

Confusion matrix:
 [[150   0   2   0   0   6   3   1   2   0]
 [  0 148   0   0   0   2   0   0   2   0]
 [  0  15 113   8   2   3   3   1   8   2]
 [  1   5   8 117   1   7   1   2   8   4]
 [  1   4   2   0 108   0   3   0   1  24]
 [  3   9   0  24   4  97   2   0   1   1]
 [  3   6   2   0   4   5 123   0   0   0]
 [  1  14   2   0   6   1   0 127   1   6]
 [  3   6   4   8   0  12   1   0  95   3]
 [  3   0   1   1  19   2   1   3   2 126]] 

classification report:
               precision    recall  f1-score   support

           0       0.91      0.91      0.91       164
           1       0.71      0.97      0.82       152
           2       0.84      0.73      0.78       155
           3       0.74      0.76      0.75       154
           4       0.75      0.76      0.75       143
           5       0.72      0.69      0.70       141
           6       0.90      0.86      0.88       143
           7       0.95      0.80      0.87       158
           8       0.79      0.72    

## Naive Bayesian Classifier (NBC)

In [8]:
import heapq
class NBC:
    def __init__(self, alpha = 0):
        self.condition = dict()
        self.nbLabels = dict()
        self.prior = dict()
        self.alpha = alpha
    
    def fit(self, data, target):
        data = np.array(data)
        target = np.array(target)
        
        for i in range(data.shape[0]):
            if target[i] not in self.condition:
                self.condition[target[i]] = dict(zip([i for i in range(data.shape[1])], [{} for _ in range(data.shape[1])]))
                self.nbLabels[target[i]] = 1
            else:
                self.nbLabels[target[i]] += 1
            for j in range(data.shape[1]):
                if data[i][j] not in self.condition[target[i]][j]:
                    self.condition[target[i]][j][data[i][j]] = 1
                else:
                    self.condition[target[i]][j][data[i][j]] += 1
           
        for label in self.nbLabels:
            self.prior[label] = self.nbLabels[label] / data.shape[0]
            for j in range(data.shape[1]):
                for attr_value in self.condition[label][j]:
                    self.condition[label][j][attr_value] = (self.condition[label][j][attr_value] + self.alpha)/(self.nbLabels[label]+ self.alpha * data.shape[1])
    
    
    # Prediction is then done by finding 
    # the argmax over product of probabilities p(C_k).p(x1 | C_k)...p(xn | C_k)
    def predict(self, test):
        x = np.array(test)
        length = x.shape[0]
        y_hat = np.zeros((length, ))  
        for i in range(length):
            proba = dict()
            probas = []
            for label in self.nbLabels:
                product = -self.prior[label]
                for j in range(x.shape[1]):
                    product *= self.condition[label][j].get(x[i][j], self.alpha /(self.nbLabels[label]+self.alpha*x.shape[1]))
                heapq.heappush(probas, product)
                proba[product] = label
            y_hat[i] = proba[probas[0]]
        return y_hat
            
    def __repr__(self):
        return "NBC(alpha={})".format(self.alpha)
    def __str__(self):
        return "NBC(alpha={})".format(self.alpha)

#### SciKitLearn digits

In [11]:
nbc = NBC(0.1)
nbc.fit(x_digits_train, y_digits_train)
y_hat = nbc.predict(x_digits_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_digits_test, y_hat), "\n")
print("classification report:\n", classification_report(y_digits_test, y_hat), "\n")

Confusion matrix:
 [[51  0  0  0  1  0  1  0  0  0]
 [ 0 42  5  0  0  1  0  0  1  1]
 [ 0  0 45  0  0  0  0  0  2  0]
 [ 0  0  1 48  0  0  0  0  2  3]
 [ 0  0  0  0 59  0  0  1  0  0]
 [ 0  1  0  1  2 48  1  0  0 13]
 [ 0  0  0  0  1  0 52  0  0  0]
 [ 0  0  0  0  0  1  0 54  0  0]
 [ 0  2  0  1  0  1  0  0 39  0]
 [ 0  0  0  2  1  1  0  1  4 50]] 

classification report:
               precision    recall  f1-score   support

           0       1.00      0.96      0.98        53
           1       0.93      0.84      0.88        50
           2       0.88      0.96      0.92        47
           3       0.92      0.89      0.91        54
           4       0.92      0.98      0.95        60
           5       0.92      0.73      0.81        66
           6       0.96      0.98      0.97        53
           7       0.96      0.98      0.97        55
           8       0.81      0.91      0.86        43
           9       0.75      0.85      0.79        59

    accuracy                

#### SciKitLearn digits summarised

In [368]:
nbc = NBC(1)
nbc.fit(x_sum_train, y_sum_train)
y_sum_hat = nbc.predict(x_sum_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_sum_test, y_sum_hat), "\n")
print("classification report:\n", classification_report(y_sum_test, y_sum_hat), "\n")

Confusion matrix:
 [[51  0  0  0  1  1  0  0  0  0]
 [ 0 39  5  0  0  0  0  0  4  2]
 [ 0  2 42  0  0  0  0  0  3  0]
 [ 0  0  1 47  0  0  0  1  4  1]
 [ 0  2  0  0 55  0  0  3  0  0]
 [ 0  1  0  0  2 50  1  0  1 11]
 [ 1  0  0  0  0  0 52  0  0  0]
 [ 0  0  0  0  0  1  0 54  0  0]
 [ 0  2  0  0  0  1  0  0 40  0]
 [ 0  1  0  4  0  1  0  3  1 49]] 

classification report:
               precision    recall  f1-score   support

           0       0.98      0.96      0.97        53
           1       0.83      0.78      0.80        50
           2       0.88      0.89      0.88        47
           3       0.92      0.87      0.90        54
           4       0.95      0.92      0.93        60
           5       0.93      0.76      0.83        66
           6       0.98      0.98      0.98        53
           7       0.89      0.98      0.93        55
           8       0.75      0.93      0.83        43
           9       0.78      0.83      0.80        59

    accuracy                

#### MNIST_Light

In [370]:
# nbc = NBC(1)
nbc = NBC(2)
nbc.fit(x_light_train, y_light_train)
y_light_hat = nbc.predict(x_light_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_light_test, y_light_hat), "\n")
print("classification report:\n", classification_report(y_light_test, y_light_hat), "\n")

Confusion matrix:
 [[  0   0   0   0   0   0   0 164   0   0]
 [  0  40   0   0   0   0   0 112   0   0]
 [  0   0   0   0   0   0   0 155   0   0]
 [  0   0   0   0   0   0   0 154   0   0]
 [  0   0   0   0   0   0   0 143   0   0]
 [  0   0   0   0   0   0   0 141   0   0]
 [  0   0   0   0   0   0   0 143   0   0]
 [  0   0   0   0   0   0   0 158   0   0]
 [  0   0   0   0   0   0   0 132   0   0]
 [  0   0   0   0   0   0   0 158   0   0]] 

classification report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       164
           1       1.00      0.26      0.42       152
           2       0.00      0.00      0.00       155
           3       0.00      0.00      0.00       154
           4       0.00      0.00      0.00       143
           5       0.00      0.00      0.00       141
           6       0.00      0.00      0.00       143
           7       0.11      1.00      0.20       158
           8       0.00      0.00    

## Gaussian Naive Bayesian Classifier

In [392]:
import heapq
import math

class GNB:
    def __init__(self, epsilon = 0):
        self.epsilon = epsilon
        
        self.nbLabels = dict()
        self.prior = dict()

    def pdf(self, x, mean, std):
        exp = math.exp(-((x - mean) ** 2 / (2 * std ** 2)))
        return (1 / (math.sqrt(2 * math.pi) * std)) * exp

    def fit(self, data, target):
        data = np.array(data)
        target = np.array(target)
        self.means = dict()
        self.std = dict()
    
        for i in range(data.shape[0]):
            if target[i] not in self.nbLabels:
                self.nbLabels[target[i]] = 1 
                self.means[target[i]] = np.zeros(data.shape[1])
            else:
                self.nbLabels[target[i]] += 1
            self.means[target[i]] += self.epsilon + data[i]
                         
        for label in self.nbLabels:
            self.means[label] /= self.nbLabels[label]
            self.prior[label] = self.nbLabels[label] / data.shape[0]
    
        for i in range(data.shape[0]):
            diff = np.longdouble(data[i] - self.means[target[i]])
            if target[i] not in self.std:
                self.std[target[i]] = np.zeros(data.shape[1])
            self.std[target[i]] += (diff * diff)/(self.nbLabels[target[i]] - 1)  

    # the argmax over product of probabilities p(C_k).p(x1 | C_k)...p(xn | C_k)
    def predict(self, test):
        x = np.array(test)
        length = x.shape[0]
        y_hat = np.zeros((length, ))  
        for i in range(length):
            proba = dict()
            probas = []
            for label in self.nbLabels:
                product = np.longdouble(-self.prior[label])
                for j in range(x.shape[1]):
                      product *= self.pdf(x[i][j], self.means[label][j], math.sqrt(self.std[label][j]))
                heapq.heappush(probas, product)
                proba[product] = label
            y_hat[i] = proba[probas[0]]
        return y_hat
            
    def __repr__(self):
        return "GNB(epsilon={})".format(self.epsilon)
    def __str__(self):
        return "GNB(epsilon={})".format(self.epsilon)

#### SciKitLearn digits¶

In [393]:
gnb = GNB(0.1)
gnb.fit(x_digits_train, y_digits_train)
y_hat = gnb.predict(x_digits_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_digits_test, y_hat), "\n")
print("classification report:\n", classification_report(y_digits_test, y_hat), "\n")

Confusion matrix:
 [[50  0  0  0  1  0  0  1  0  1]
 [ 0 36  4  0  0  0  0  0  7  3]
 [ 0  1 42  0  0  0  0  0  4  0]
 [ 0  0  2 46  0  0  0  0  4  2]
 [ 0  0  0  0 59  0  0  0  1  0]
 [ 0  1  0  1  1 58  1  1  0  3]
 [ 0  0  0  0  1  1 51  0  0  0]
 [ 0  0  0  0  0  1  0 53  0  1]
 [ 0  2  0  0  0  0  0  0 41  0]
 [ 0  1  1  3  0  2  1  3  3 45]] 

classification report:
               precision    recall  f1-score   support

           0       1.00      0.94      0.97        53
           1       0.88      0.72      0.79        50
           2       0.86      0.89      0.88        47
           3       0.92      0.85      0.88        54
           4       0.95      0.98      0.97        60
           5       0.94      0.88      0.91        66
           6       0.96      0.96      0.96        53
           7       0.91      0.96      0.94        55
           8       0.68      0.95      0.80        43
           9       0.82      0.76      0.79        59

    accuracy                

#### SciKitLearn digits summarised

In [394]:
gnb = GNB(0.1)
gnb.fit(x_sum_train, y_sum_train)
y_sum_hat = gnb.predict(x_sum_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_sum_test, y_sum_hat), "\n")
print("classification report:\n", classification_report(y_sum_test, y_sum_hat), "\n")

Confusion matrix:
 [[51  0  0  0  1  0  0  1  0  0]
 [ 0 37  4  0  0  0  0  0  5  4]
 [ 0  1 45  0  0  0  0  0  1  0]
 [ 0  0  3 45  0  0  0  0  5  1]
 [ 0  0  0  0 59  0  0  0  1  0]
 [ 0  0  0  1  1 61  1  0  1  1]
 [ 0  0  0  0  1  1 51  0  0  0]
 [ 0  0  0  0  0  0  0 54  0  1]
 [ 0  0  0  0  0  1  0  0 42  0]
 [ 0  0  0  6  1  1  0  3  1 47]] 

classification report:
               precision    recall  f1-score   support

           0       1.00      0.96      0.98        53
           1       0.97      0.74      0.84        50
           2       0.87      0.96      0.91        47
           3       0.87      0.83      0.85        54
           4       0.94      0.98      0.96        60
           5       0.95      0.92      0.94        66
           6       0.98      0.96      0.97        53
           7       0.93      0.98      0.96        55
           8       0.75      0.98      0.85        43
           9       0.87      0.80      0.83        59

    accuracy                

#### MNIST_Light

In [397]:
gnb = GNB(20)
gnb.fit(x_light_train, y_light_train)
y_light_hat = gnb.predict(x_light_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_light_test, y_light_hat), "\n")
print("classification report:\n", classification_report(y_light_test, y_light_hat), "\n")

Confusion matrix:
 [[153   0   2   0   0   2   4   0   2   1]
 [  0 149   0   0   0   1   1   0   1   0]
 [  0   6 106  12   1   4  13   1  11   1]
 [  0   6  16 113   1   2   2   2   6   6]
 [  0   1   2   0 100   1   3   1   2  33]
 [  5   5   0  19   5  97   1   1   2   6]
 [  3   4   0   0   0   2 133   0   1   0]
 [  0  11   2   0   7   0   0 129   0   9]
 [  0  14   3   2   1   5   2   0  94  11]
 [  2   2   0   1  12   1   0   2   1 137]] 

classification report:
               precision    recall  f1-score   support

           0       0.94      0.93      0.94       164
           1       0.75      0.98      0.85       152
           2       0.81      0.68      0.74       155
           3       0.77      0.73      0.75       154
           4       0.79      0.70      0.74       143
           5       0.84      0.69      0.76       141
           6       0.84      0.93      0.88       143
           7       0.95      0.82      0.88       158
           8       0.78      0.71    

#### MNIST_Light: Normalized

In [359]:
gnb = GNB(0.1)
gnb.fit(x_norm_train, y_light_train)
y_light_hat = gnb.predict(x_norm_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_light_test, y_light_hat), "\n")
print("classification report:\n", classification_report(y_light_test, y_light_hat), "\n")

Confusion matrix:
 [[153   0   1   0   0   3   4   0   2   1]
 [  0 149   0   0   0   1   1   0   1   0]
 [  0   7 109  10   4   4   8   1  11   1]
 [  0   7  14 114   1   3   0   2   7   6]
 [  0   2   2   0 103   0   2   1   2  31]
 [  4   5   0  18   4 100   1   1   1   7]
 [  3   5   0   0   1   3 131   0   0   0]
 [  0  11   2   0   7   0   0 129   0   9]
 [  0  16   3   2   2   7   2   0  89  11]
 [  2   4   0   1  12   1   0   2   1 135]] 

classification report:
               precision    recall  f1-score   support

           0       0.94      0.93      0.94       164
           1       0.72      0.98      0.83       152
           2       0.83      0.70      0.76       155
           3       0.79      0.74      0.76       154
           4       0.77      0.72      0.74       143
           5       0.82      0.71      0.76       141
           6       0.88      0.92      0.90       143
           7       0.95      0.82      0.88       158
           8       0.78      0.67    

#### SciKitLearn digits

In [363]:
gnb = GaussianNB(var_smoothing=0.1)
gnb = GaussianNB()
gnb.fit(x_digits_train, y_digits_train)
y_hat = gnb.predict(x_digits_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_digits_test, y_hat), "\n")
print("classification report:\n", classification_report(y_digits_test, y_hat), "\n")

Confusion matrix:
 [[52  0  0  0  0  0  0  1  0  0]
 [ 0 37  2  0  0  0  0  2  6  3]
 [ 0  3 31  0  0  0  1  0 12  0]
 [ 0  0  2 41  0  0  1  0  8  2]
 [ 0  0  0  0 51  0  2  7  0  0]
 [ 0  0  0  1  0 62  1  2  0  0]
 [ 0  0  0  0  1  1 51  0  0  0]
 [ 0  0  0  0  0  1  0 54  0  0]
 [ 0  2  0  0  0  0  0  2 39  0]
 [ 0  1  1  1  0  2  1  7  4 42]] 

classification report:
               precision    recall  f1-score   support

           0       1.00      0.98      0.99        53
           1       0.86      0.74      0.80        50
           2       0.86      0.66      0.75        47
           3       0.95      0.76      0.85        54
           4       0.98      0.85      0.91        60
           5       0.94      0.94      0.94        66
           6       0.89      0.96      0.93        53
           7       0.72      0.98      0.83        55
           8       0.57      0.91      0.70        43
           9       0.89      0.71      0.79        59

    accuracy                

#### SciKitLearn digits summarised¶

In [376]:
gnb = GaussianNB(var_smoothing=0.1)
# gnb = GaussianNB()
gnb.fit(x_sum_train, y_sum_train)
y_sum_hat = gnb.predict(x_sum_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_sum_test, y_sum_hat), "\n")
print("classification report:\n", classification_report(y_sum_test, y_sum_hat), "\n")

Confusion matrix:
 [[51  0  0  0  1  1  0  0  0  0]
 [ 0 36  5  0  0  0  0  0  6  3]
 [ 0  1 44  0  0  0  0  0  2  0]
 [ 0  0  1 47  0  0  0  1  4  1]
 [ 0  0  0  0 59  0  0  1  0  0]
 [ 0  0  0  0  1 59  1  0  0  5]
 [ 0  0  0  0  1  0 52  0  0  0]
 [ 0  0  0  0  0  0  0 54  0  1]
 [ 0  2  0  0  0  1  0  1 39  0]
 [ 0  0  0  6  0  1  0  3  1 48]] 

classification report:
               precision    recall  f1-score   support

           0       1.00      0.96      0.98        53
           1       0.92      0.72      0.81        50
           2       0.88      0.94      0.91        47
           3       0.89      0.87      0.88        54
           4       0.95      0.98      0.97        60
           5       0.95      0.89      0.92        66
           6       0.98      0.98      0.98        53
           7       0.90      0.98      0.94        55
           8       0.75      0.91      0.82        43
           9       0.83      0.81      0.82        59

    accuracy                

#### MNIST_Light

In [377]:
gnb = GaussianNB(var_smoothing=0.1)
# gnb = GaussianNB()
gnb.fit(x_light_train, y_light_train)
y_light_hat = gnb.predict(x_light_test)
# Evaluation:
print("Confusion matrix:\n", confusion_matrix(y_light_test, y_light_hat), "\n")
print("classification report:\n", classification_report(y_light_test, y_light_hat), "\n")

Confusion matrix:
 [[156   0   1   0   0   1   2   0   3   1]
 [  0 149   0   0   0   1   1   0   1   0]
 [  0   6 110  12   1   1  12   1  12   0]
 [  0   7  14 115   1   0   1   2   8   6]
 [  0   2   2   0  95   0   4   0   3  37]
 [  7   6   0  21   3  94   1   1   4   4]
 [  3   5   0   0   0   1 133   0   1   0]
 [  0  12   2   0   5   0   0 128   1  10]
 [  1  13   2   3   0   2   2   0  99  10]
 [  2   4   0   1  11   1   0   2   1 136]] 

classification report:
               precision    recall  f1-score   support

           0       0.92      0.95      0.94       164
           1       0.73      0.98      0.84       152
           2       0.84      0.71      0.77       155
           3       0.76      0.75      0.75       154
           4       0.82      0.66      0.73       143
           5       0.93      0.67      0.78       141
           6       0.85      0.93      0.89       143
           7       0.96      0.81      0.88       158
           8       0.74      0.75    