In [1]:
from sklearn import datasets, metrics
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
import MNIST
import NCC
import NBC
import GNB

In [2]:
digits = datasets.load_digits()
digits_train, digits_test, y_digits_train, y_digits_test = train_test_split( digits.data, digits.target, test_size=0.3, random_state=163)

In [3]:
binned_digits=digits.data
for i, features in enumerate(digits.data):
    for j, feature in enumerate(features):
        if feature < 5:
            binned_digits[i][j]=0
        elif feature > 10:
            binned_digits[i][j]=2
        else:
            binned_digits[i][j]=1
binned_train, binned_test, y_binned_train, y_binned_test = train_test_split(binned_digits, digits.target, test_size=0.3, random_state=163)

In [4]:
mnist = MNIST.MNISTData('MNIST_Light/*/*.png')
train_features, test_features, train_labels, test_labels = mnist.get_data()

In [5]:
### Digits data set ###

In [6]:
# sklearn GNB
gnb_sk = GaussianNB()
gnb_sk.fit(digits_train, y_digits_train)
y_pred = gnb_sk.predict(digits_test)
print("Classification report SKLearn GNB:\n%s\n"
  % (metrics.classification_report(y_digits_test, y_pred)))
print("Confusion matrix SKLearn GNB:\n%s" % metrics.confusion_matrix(y_digits_test, y_pred))

Classification report SKLearn GNB:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99        55
           1       0.67      0.94      0.78        64
           2       0.92      0.51      0.66        47
           3       0.92      0.78      0.84        63
           4       0.98      0.83      0.90        53
           5       0.85      0.90      0.87        49
           6       1.00      0.98      0.99        53
           7       0.72      0.98      0.83        51
           8       0.51      0.68      0.58        47
           9       0.97      0.59      0.73        58

    accuracy                           0.82       540
   macro avg       0.85      0.82      0.82       540
weighted avg       0.86      0.82      0.82       540


Confusion matrix SKLearn GNB:
[[55  0  0  0  0  0  0  0  0  0]
 [ 0 60  0  0  0  0  0  1  3  0]
 [ 0  7 24  1  0  0  0  0 15  0]
 [ 0  1  1 49  0  3  0  1  8  0]
 [ 0  3  0  0 44  1  0  5  0  0]
 [ 0  0  0

In [7]:
# My NCC
ncc = NCC.NCC()
ncc.fit(digits_train, y_digits_train)
y_pred = ncc.predict(digits_test)
print("Classification report My NCC:\n%s\n"
  % (metrics.classification_report(y_digits_test, y_pred)))
print("Confusion matrix My NCC:\n%s" % metrics.confusion_matrix(y_digits_test, y_pred))

Classification report SKLearn GNB:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99        55
           1       0.56      0.97      0.71        64
           2       1.00      0.53      0.69        47
           3       0.75      0.94      0.83        63
           4       1.00      0.81      0.90        53
           5       1.00      0.65      0.79        49
           6       1.00      0.89      0.94        53
           7       0.88      0.98      0.93        51
           8       1.00      0.21      0.35        47
           9       0.64      0.88      0.74        58

    accuracy                           0.80       540
   macro avg       0.88      0.79      0.79       540
weighted avg       0.87      0.80      0.79       540


Confusion matrix SKLearn GNB:
[[55  0  0  0  0  0  0  0  0  0]
 [ 0 62  0  0  0  0  0  0  0  2]
 [ 0  7 25 15  0  0  0  0  0  0]
 [ 0  1  0 59  0  0  0  1  0  2]
 [ 1  8  0  0 43  0  0  1  0  0]
 [ 0  2  0

In [8]:
# My NBC
nbc = NBC.NBC()
nbc.fit(digits_train, y_digits_train)
y_pred = nbc.predict(digits_test)
print("Classification report My NBC:\n%s\n"
  % (metrics.classification_report(y_digits_test, y_pred)))
print("Confusion matrix My NBC:\n%s" % metrics.confusion_matrix(y_digits_test, y_pred))

Classification report My NBC:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        55
           1       0.95      0.88      0.91        64
           2       0.89      0.85      0.87        47
           3       0.86      0.90      0.88        63
           4       0.94      0.94      0.94        53
           5       0.92      0.90      0.91        49
           6       0.98      1.00      0.99        53
           7       0.88      1.00      0.94        51
           8       0.81      0.81      0.81        47
           9       0.84      0.81      0.82        58

    accuracy                           0.91       540
   macro avg       0.91      0.91      0.91       540
weighted avg       0.91      0.91      0.91       540


Confusion matrix My NBC:
[[54  0  0  0  1  0  0  0  0  0]
 [ 0 56  4  0  0  1  1  0  1  1]
 [ 0  1 40  4  0  0  0  0  2  0]
 [ 0  0  0 57  0  0  0  1  3  2]
 [ 0  0  0  0 50  1  0  2  0  0]
 [ 0  0  0  1  0 44 

In [10]:
# My GNB
gnb = GNB.GNB()
gnb.fit(digits_train, y_digits_train)
y_pred = gnb.predict(digits_test)
print("Classification report My GNB:\n%s\n"
  % (metrics.classification_report(y_digits_test, y_pred)))
print("Confusion matrix My GNB:\n%s" % metrics.confusion_matrix(y_digits_test, y_pred))

100%|████████████████████████████████████████| 540/540 [00:32<00:00, 16.75it/s]


Classification report My GNB:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        55
           1       0.84      0.88      0.85        64
           2       0.89      0.68      0.77        47
           3       0.92      0.87      0.89        63
           4       0.94      0.92      0.93        53
           5       0.86      0.90      0.88        49
           6       1.00      0.98      0.99        53
           7       0.81      0.98      0.88        51
           8       0.67      0.85      0.75        47
           9       0.89      0.71      0.79        58

    accuracy                           0.88       540
   macro avg       0.88      0.88      0.87       540
weighted avg       0.88      0.88      0.88       540


Confusion matrix My GNB:
[[54  0  0  0  1  0  0  0  0  0]
 [ 0 56  2  0  0  0  0  1  2  3]
 [ 0  4 32  1  0  0  0  0 10  0]
 [ 0  0  1 55  0  1  0  1  5  0]
 [ 0  0  0  0 49  1  0  2  0  1]
 [ 0  0  0  2  0 44 

In [None]:
### Digits summerised data set ###

In [11]:
# sklearn GNB
gnb_sk = GaussianNB()
gnb_sk.fit(binned_train, y_binned_train)
y_pred = gnb_sk.predict(binned_test)
print("Classification report SKLearn GNB:\n%s\n"
  % (metrics.classification_report(y_binned_test, y_pred)))
print("Confusion matrix SKLearn GNB:\n%s" % metrics.confusion_matrix(y_binned_test, y_pred))

Classification report SKLearn GNB:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        55
           1       0.84      0.59      0.70        64
           2       0.83      0.62      0.71        47
           3       0.87      0.52      0.65        63
           4       0.97      0.58      0.73        53
           5       0.89      0.69      0.78        49
           6       0.89      0.96      0.93        53
           7       0.56      0.94      0.71        51
           8       0.35      0.89      0.50        47
           9       0.94      0.52      0.67        58

    accuracy                           0.72       540
   macro avg       0.81      0.73      0.74       540
weighted avg       0.82      0.72      0.74       540


Confusion matrix SKLearn GNB:
[[55  0  0  0  0  0  0  0  0  0]
 [ 0 38  3  0  0  1  0  5 16  1]
 [ 0  0 29  0  0  0  0  0 18  0]
 [ 0  0  2 33  0  0  0  3 25  0]
 [ 0  3  0  0 31  1  5 12  1  0]
 [ 1  0  0

In [12]:
# My NCC
ncc = NCC.NCC()
ncc.fit(binned_train, y_binned_train)
y_pred = ncc.predict(binned_test)
print("Classification report My NCC:\n%s\n"
  % (metrics.classification_report(y_binned_test, y_pred)))
print("Confusion matrix My NCC:\n%s" % metrics.confusion_matrix(y_binned_test, y_pred))

Classification report My NCC:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        55
           1       0.44      0.98      0.61        64
           2       1.00      0.53      0.69        47
           3       0.77      0.94      0.84        63
           4       0.98      0.77      0.86        53
           5       0.97      0.61      0.75        49
           6       1.00      0.81      0.90        53
           7       0.87      0.90      0.88        51
           8       1.00      0.09      0.16        47
           9       0.67      0.79      0.72        58

    accuracy                           0.76       540
   macro avg       0.87      0.74      0.74       540
weighted avg       0.85      0.76      0.75       540


Confusion matrix My NCC:
[[54  0  0  0  1  0  0  0  0  0]
 [ 0 63  0  0  0  0  0  0  0  1]
 [ 0 11 25 11  0  0  0  0  0  0]
 [ 0  2  0 59  0  0  0  1  0  1]
 [ 0 11  0  0 41  0  0  1  0  0]
 [ 0  3  0  3  0 30 

In [13]:
# My NBC
nbc = NBC.NBC()
nbc.fit(binned_train, y_binned_train)
y_pred = nbc.predict(binned_test)
print("Classification report My NBC:\n%s\n"
  % (metrics.classification_report(y_binned_test, y_pred)))
print("Confusion matrix My NBC:\n%s" % metrics.confusion_matrix(y_binned_test, y_pred))

Classification report My NBC:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        55
           1       0.87      0.83      0.85        64
           2       0.93      0.89      0.91        47
           3       0.94      0.95      0.94        63
           4       0.96      0.91      0.93        53
           5       0.90      0.88      0.89        49
           6       1.00      0.98      0.99        53
           7       0.86      1.00      0.93        51
           8       0.81      0.83      0.82        47
           9       0.85      0.86      0.85        58

    accuracy                           0.91       540
   macro avg       0.91      0.91      0.91       540
weighted avg       0.91      0.91      0.91       540


Confusion matrix My NBC:
[[54  0  0  0  1  0  0  0  0  0]
 [ 0 53  3  0  0  1  0  0  5  2]
 [ 0  1 42  2  0  0  0  0  1  1]
 [ 0  0  0 60  0  0  0  1  2  0]
 [ 0  2  0  0 48  0  0  3  0  0]
 [ 0  1  0  1  0 43 

In [14]:
# My GNB
gnb = GNB.GNB()
gnb.fit(binned_train, y_binned_train)
y_pred = gnb.predict(binned_test)
print("Classification report My GNB:\n%s\n"
  % (metrics.classification_report(y_binned_test, y_pred)))
print("Confusion matrix My GNB:\n%s" % metrics.confusion_matrix(y_binned_test, y_pred))

100%|████████████████████████████████████████| 540/540 [00:33<00:00, 15.70it/s]


Classification report My GNB:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        55
           1       0.90      0.72      0.80        64
           2       0.89      0.85      0.87        47
           3       0.93      0.84      0.88        63
           4       0.96      0.85      0.90        53
           5       0.87      0.92      0.89        49
           6       1.00      0.96      0.98        53
           7       0.76      0.94      0.84        51
           8       0.60      0.91      0.72        47
           9       0.92      0.76      0.83        58

    accuracy                           0.87       540
   macro avg       0.88      0.87      0.87       540
weighted avg       0.89      0.87      0.87       540


Confusion matrix My GNB:
[[54  0  0  0  1  0  0  0  0  0]
 [ 0 46  3  0  0  1  0  2 10  2]
 [ 0  1 40  0  0  0  0  0  6  0]
 [ 0  0  2 53  0  0  0  2  5  1]
 [ 0  2  0  0 45  1  0  3  2  0]
 [ 0  0  0  2  0 45 

In [None]:
### MNIST_Light data set ###

In [15]:
# sklearn GNB
gnb_sk = GaussianNB()
gnb_sk.fit(train_features, train_labels)
y_pred = gnb_sk.predict(test_features)
print("Classification report SKLearn GNB:\n%s\n"
  % (metrics.classification_report(test_labels, y_pred)))
print("Confusion matrix SKLearn GNB:\n%s" % metrics.confusion_matrix(test_labels, y_pred))

Classification report SKLearn GNB:
              precision    recall  f1-score   support

           0       0.54      0.94      0.69       164
           1       0.71      0.94      0.81       152
           2       0.83      0.50      0.62       155
           3       0.83      0.53      0.65       154
           4       0.75      0.31      0.44       143
           5       0.67      0.16      0.25       141
           6       0.81      0.85      0.83       143
           7       0.83      0.82      0.83       158
           8       0.41      0.64      0.50       132
           9       0.60      0.84      0.70       158

    accuracy                           0.66      1500
   macro avg       0.70      0.65      0.63      1500
weighted avg       0.70      0.66      0.64      1500


Confusion matrix SKLearn GNB:
[[154   0   6   0   1   1   0   0   1   1]
 [  1 143   1   0   0   1   0   1   3   2]
 [ 11   6  77  10   2   1  19   1  27   1]
 [ 32  11   5  82   0   0   2   3  12   7]
 [ 

In [16]:
# My NCC
ncc = NCC.NCC()
ncc.fit(train_features, train_labels)
y_pred = ncc.predict(test_features)
print("Classification report My NCC:\n%s\n"
  % (metrics.classification_report(test_labels, y_pred)))
print("Confusion matrix My NCC:\n%s" % metrics.confusion_matrix(test_labels, y_pred))

Classification report My NCC:
              precision    recall  f1-score   support

           0       0.89      0.95      0.92       164
           1       0.77      0.97      0.86       152
           2       0.86      0.75      0.80       155
           3       0.71      0.77      0.74       154
           4       0.77      0.76      0.76       143
           5       0.83      0.63      0.72       141
           6       0.90      0.87      0.89       143
           7       0.95      0.82      0.88       158
           8       0.72      0.80      0.76       132
           9       0.76      0.78      0.77       158

    accuracy                           0.81      1500
   macro avg       0.82      0.81      0.81      1500
weighted avg       0.82      0.81      0.81      1500


Confusion matrix My NCC:
[[156   0   1   0   0   2   3   0   2   0]
 [  0 147   0   0   0   2   0   0   2   1]
 [  0  12 116  10   2   1   2   1  10   1]
 [  3   3   7 119   0   2   1   2  13   4]
 [  1   3   2

In [17]:
# My NBC
nbc = NBC.NBC()
nbc.fit(train_features, train_labels)
y_pred = nbc.predict(test_features)
print("Classification report My NBC:\n%s\n"
  % (metrics.classification_report(test_labels, y_pred)))
print("Confusion matrix My NBC:\n%s" % metrics.confusion_matrix(test_labels, y_pred))

Classification report My NBC:
              precision    recall  f1-score   support

           0       1.00      0.12      0.22       164
           1       0.84      0.98      0.90       152
           2       0.74      0.17      0.27       155
           3       0.68      0.29      0.40       154
           4       0.77      0.55      0.64       143
           5       0.72      0.42      0.53       141
           6       0.83      0.71      0.76       143
           7       0.92      0.75      0.83       158
           8       0.57      0.38      0.46       132
           9       0.20      0.87      0.33       158

    accuracy                           0.52      1500
   macro avg       0.73      0.52      0.53      1500
weighted avg       0.73      0.52      0.53      1500


Confusion matrix My NBC:
[[ 20   0   0   2   1   3   2   1   2 133]
 [  0 149   0   0   0   1   1   0   1   0]
 [  0   3  26   4   3   2  12   0  10  95]
 [  0   2   5  44   0   5   2   1   8  87]
 [  0   2   0

In [18]:
# My GNB
gnb = GNB.GNB()
gnb.fit(train_features, train_labels)
y_pred = gnb.predict(test_features)
print("Classification report My GNB:\n%s\n"
  % (metrics.classification_report(test_labels, y_pred)))
print("Confusion matrix My GNB:\n%s" % metrics.confusion_matrix(test_labels, y_pred))

  if prospect_prob >= max_prob:
  if prospect_prob >= max_prob:
100%|██████████████████████████████████████| 1500/1500 [10:04<00:00,  2.61it/s]


Classification report My GNB:
              precision    recall  f1-score   support

           0       0.88      0.93      0.90       164
           1       0.78      0.97      0.86       152
           2       0.79      0.60      0.68       155
           3       0.80      0.77      0.79       154
           4       0.83      0.60      0.70       143
           5       0.90      0.50      0.65       141
           6       0.81      0.94      0.87       143
           7       0.95      0.77      0.85       158
           8       0.59      0.73      0.65       132
           9       0.60      0.90      0.72       158

    accuracy                           0.77      1500
   macro avg       0.79      0.77      0.77      1500
weighted avg       0.80      0.77      0.77      1500


Confusion matrix My GNB:
[[152   0   5   0   0   1   1   0   3   2]
 [  0 147   0   0   0   0   1   0   4   0]
 [  1   5  93   9   1   1  17   1  26   1]
 [  1   5  13 119   0   1   2   2   5   6]
 [  2   1   2