# Machine Learning Homework 2

## Naive Bayes Classifier

In [30]:
import numpy as np

# mode 0: discrete mode; mode 1: continuous mode
mode = 0

In [277]:
# parse the input files

import struct

with open('./train-labels-idx1-ubyte', 'rb') as f:
    data = f.read()
    offset = 0
    fmt = '>2i'
    magic_num, num_labels = struct.unpack_from(fmt, data, offset)
    offset += struct.calcsize(fmt)
    fmt = '>{}B'.format(str(num_labels))
    train_labels = np.array(struct.unpack_from(fmt, data, offset))

with open('./train-images-idx3-ubyte', 'rb') as f:
    data = f.read()
    offset = 0
    fmt = '>4i'
    magic_num, num_images, height, width = struct.unpack_from(fmt, data, offset)
    image_size = height*width
    offset += struct.calcsize(fmt)
    fmt = '>{}B'.format(str(image_size))
    train_images = np.empty((num_images, image_size))
    for i in range(num_images):
        train_images[i] = np.array(struct.unpack_from(fmt, data, offset))
        offset += struct.calcsize(fmt)

with open('./t10k-labels-idx1-ubyte', 'rb') as f:
    data = f.read()
    offset = 0
    fmt = '>2i'
    magic_num, num_labels = struct.unpack_from(fmt, data, offset)
    offset += struct.calcsize(fmt)
    fmt = '>{}B'.format(str(num_labels))
    test_labels = struct.unpack_from(fmt, data, offset)

with open('./t10k-images-idx3-ubyte', 'rb') as f:
    data = f.read()
    offset = 0
    fmt = '>4i'
    magic_num, num_images, height, width = struct.unpack_from(fmt, data, offset)
    images_size = height*width
    offset += struct.calcsize(fmt)
    fmt = '>{}B'.format(str(image_size))
    test_images = np.empty((num_images, image_size))
    for i in range(num_images):
        test_images[i] = np.array(struct.unpack_from(fmt, data, offset))
        offset += struct.calcsize(fmt)

In [None]:
num_images = 60000
height, width, size = 28, 28, 784
mode = 1

if mode == 0:
    # training
    unique, count = np.unique(train_labels, return_counts=True)
    counts = dict(zip(unique, count))
    bins = np.ones((10, size, 32), dtype='int32')
    for i in range(num_images):
        for j in range(size):
            bins[train_labels[i], j, int(train_images[i, j]/8)] += 1
    bins = np.empty((10, size, 32))
    for i in range(10):
        bins[i, :, :] = bins[i, :, :]/(count[i]+32)
       
    # testing
    d = {True: 0, False: 0}
    for i in range(len(test_images)):
        p = np.zeros(10)
        img = test_images[i]
        data = (img/8).astype(int)
        for j in range(10):
            p[j] = np.log([bins[j, k, data[k]] for k in range(size)]).sum() + np.log(counts[j]/num_images)
        result = np.argmax(p)
        p = p/np.sum(p)
        is_match = result==test_labels[i]
        d[is_match] = d[is_match] + 1
        
        print('Posterior (in log scale):')
        for k, l in enumerate(p):
            print('%d: %f' % (k, l))
        print('Prediction: %d, Ans: %d' % (result, test_labels[i]))
        print()
    print('Error rate: %f' % (d[False]/(d[True]+d[False])))
else:
    # training
    unique, count = np.unique(train_labels, return_counts=True)
    counts = dict(zip(unique, count))
    mean = np.empty((10, size))
    variance = np.empty((10, size))
    for i in range(10):
        mean[i] = np.mean(train_images[train_labels==i], axis=0)
        variance[i] = np.var(train_images[train_labels==i], axis=0)
    
    # testing
    d = {True: 0, False: 0}
    for i in range(len(test_images)):
        p = np.zeros(10)
        img = test_images[i]
        for j in range(10):
            for k in range(size):
                if variance[j, k] != 0:
                    p[j] += (-0.39908993417 - np.log(variance[j, k]**0.5) + (((img[k]-mean[j, k])**2)/(-2*variance[j, k])))
                else:
                    p[j] += (-0.39908993417 - 3)
            p[j] += np.log(counts[j]/num_images)
        result = np.argmax(p)
        p = p/np.sum(p)
        is_match = result==test_labels[i]
        d[is_match] = d[is_match] + 1
        
        print('Posterior (in log scale):')
        for k, l in enumerate(p):
            print('%d: %f' % (k, l))
        print('Prediction: %d, Ans: %d' % (result, test_labels[i]))
        print()
    print('Error rate: %f' % (d[False]/(d[True]+d[False])))
    
mean = np.empty((10, size))
for i in range(10):
    mean[i] = np.mean(train_images[train_labels==i], axis=0)
imagination = np.empty((10, size))
for i in range(10):
    imagination[i] = np.where(mean[i]>=128, 1, 0)
imagination = imagination.astype(int)
print('Imagination of numbers in Baysian classifier:')
print()
for i in range(10):
    print('%d:' % i)
    for j in range(height):
        print(str(imagination[i, j*width:(j+1)*width]))

## Online Learning

In [201]:
a, b = 10, 1

with open('./test.txt', 'r') as f:
    data = f.read().split('\n')

In [202]:
def factorial(end, start=0):
    if end == 0:
        return 1
    elif end == start:
        return end
    else:
        return end * factorial(end-1, start)
    
def combination(n, m):
    return int(factorial(n, n-m+1)/factorial(m))

def likelihood(a, b):
    p = a/(a+b)
    return combination(a+b, a) * (p**a) * ((1-p)**b)

def parse_input(s):
    return (s.count('1'), s.count('0'))

In [203]:
posterior = (a, b)
for i in range(len(data)):
    prior = (posterior[0], posterior[1])
    new_input = parse_input(data[i])
    posterior = (prior[0]+new_input[0], prior[1]+new_input[1])
    l = likelihood(new_input[0], new_input[1])
    
    print('case %d: %s' % (i, data[i]))
    print('Likelihood: %f' % l)
    print('Beta prior:     a = %d   b = %d' % (prior[0], prior[1]))
    print('Beta posterior: a = %d   b = %d' % (posterior[0], posterior[1]))
    print()

case 0: 0101010101001011010101
Likelihood: 0.168188
Beta prior:     a = 10   b = 1
Beta posterior: a = 21   b = 12

case 1: 0110101
Likelihood: 0.293755
Beta prior:     a = 21   b = 12
Beta posterior: a = 25   b = 15

case 2: 010110101101
Likelihood: 0.228605
Beta prior:     a = 25   b = 15
Beta posterior: a = 32   b = 20

case 3: 0101101011101011010
Likelihood: 0.182869
Beta prior:     a = 32   b = 20
Beta posterior: a = 43   b = 28

case 4: 111101100011110
Likelihood: 0.214307
Beta prior:     a = 43   b = 28
Beta posterior: a = 53   b = 33

case 5: 101110111000110
Likelihood: 0.206598
Beta prior:     a = 53   b = 33
Beta posterior: a = 62   b = 39

case 6: 1010010111
Likelihood: 0.250823
Beta prior:     a = 62   b = 39
Beta posterior: a = 68   b = 43

case 7: 11101110110
Likelihood: 0.261968
Beta prior:     a = 68   b = 43
Beta posterior: a = 76   b = 46

case 8: 01000111101
Likelihood: 0.236091
Beta prior:     a = 76   b = 46
Beta posterior: a = 82   b = 51

case 9: 110100111
Likeli