In [None]:
import numpy as np
import math as ma
import os

In [None]:
def handle_with_mnist(x_file,y_file):
    x_file.read(4) # magic number
    image=int.from_bytes(x_file.read(4),byteorder='big') # number of images
    row=int.from_bytes(x_file.read(4),byteorder='big') # number of rows
    col=int.from_bytes(x_file.read(4),byteorder='big') # number of columns
    y_file.read(8) # magic number & number of images
    x=np.zeros((image,row*col),dtype='uint8')
    y=np.zeros(image,dtype='uint8')
    for i in range(image):
        for j in range(row*col):
            x[i][j]=int.from_bytes(x_file.read(1),byteorder='big') # all pixels of an image with 0~255
        y[i]=int.from_bytes(y_file.read(1),byteorder='big') # 0~9
    return x,y

In [None]:
def Mnist():
    train_x_file=open('train-images.idx3-ubyte','rb')
    train_y_file=open('train-labels.idx1-ubyte','rb')
    test_x_file=open('t10k-images.idx3-ubyte','rb')
    test_y_file=open('t10k-labels.idx1-ubyte','rb')
    train_x,train_y=handle_with_mnist(train_x_file,train_y_file)
    test_x,test_y=handle_with_mnist(test_x_file,test_y_file)
    return (train_x,train_y),(test_x,test_y)

In [None]:
(train_x,train_y),(test_x,test_y)=Mnist()

In [None]:
# Discrete version
fre_n=np.zeros((10,784,32))
num_train=np.zeros(10)
# 256 levels into 32 bins of levels
for i in range(60000):
    num_train[train_y[i]]+=1
    for j in range(784):
        fre_n[train_y[i]][j][train_x[i][j]//8]+=1

In [None]:
# Continuous version
mean=np.zeros((10,784))
var=np.zeros((10,784))
for i in range(60000):
    for j in range(784):
        mean[train_y[i]][j]+=train_x[i][j]
for i in range(10):
    mean[i]/=num_train[i]
for i in range(60000):
    for j in range(784):
        var[train_y[i]][j]+=(train_x[i][j]-mean[train_y[i]][j])**2
for i in range(10):
    var[i]/=num_train[i]

In [None]:
def Gaussian(x,m,v):
    if v!=0:
        return ma.exp(-(x-m)**2/(2*v))/ma.sqrt(2*ma.pi*v)
    else: # all value is equal
        return m

In [None]:
def print_post(posterior):
    print('Postirior (in log scale):',file=f)
    for i in range(10):
        print(i,end=':',file=f)
        print(posterior[i]/posterior.sum(),file=f)

In [None]:
def Ima_num(mode):
    print('Imagination of numbers in Bayesian classifier:',file=f)
    for i in range(10):
        print(file=f)
        print(i,end=':',file=f)
        print(file=f)
        ima_n=np.zeros((28,28))
        for j in range(784):
            if mode==0: # Discrete version
                for k in range(32):
                    if fre_n[i][j][k]==fre_n[i][j].max():
                        if k>=15:
                            ima_n[j//28][j%28]=1
                        break
            if mode==1: # Continuous version
                if mean[i][j]>=128:
                    ima_n[j//28][j%28]=1
        for j in range(28):
            for k in range(28):
                print(int(ima_n[j][k]),end='',file=f)
            print(file=f)

In [None]:
# Pixels are independent
def Model(mode):
    wrong=0
    for image in range(10000):
        posterior=np.zeros(10)
        for i in range(10):
            prior=ma.log(num_train[i])
            likelihood=0
            for j in range(784):
                p=-ma.log(num_train[i])
                if mode==0: # Discrete version
                    level=test_x[image][j]//8
                    if fre_n[i][j][level]!=0:
                        p+=ma.log(fre_n[i][j][level])
                if mode==1: # Continuous version
                    x=Gaussian(test_x[image][j],mean[i][j],var[i][j])
                    if x>0:
                        p+=ma.log(x)
                likelihood+=p
            posterior[i]=prior+likelihood
        print_post(posterior)
        for i in range(10):
            if posterior[i]==posterior.max():
                guess=i
                break
        print('Prediction:',guess,'Ans:',test_y[image],file=f)
        if guess!=test_y[image]:
            wrong+=1
        print(file=f)
    Ima_num(mode)
    print('Error rate:',wrong/10000,file=f)
    f.close()

In [None]:
path='output.txt'
f=open(path,'w')

In [None]:
Model(0)

In [None]:
Model(1)