In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random as rd
import numba

In [2]:
data = pd.read_csv('mnist_train.csv')

In [3]:
x = []
for i in range(10):
    x_temp = data[data['label'] == i]
    x_temp = x_temp.drop('label', 1)
    x.append(x_temp)

m_samples = []
mu = []
pi = []
log_pi = []
for i in range(10):
    x[i] = np.array(x[i])
    m_samples.append(len(x[i]))
    mu.append(np.mean(x[i], axis=0))
    pi.append(m_samples[i]/len(data))
    log_pi.append(np.log(pi[i]))

In [4]:
sigma = []
sigma_inv = []
for i in range(10):
    sigma.append(np.cov(x[i].transpose()))
    sigma_inv.append(np.linalg.pinv(sigma[i]))

mean_sigma = np.zeros(sigma[0].shape)
for i in range(10):
    mean_sigma += sigma[i]*m_samples[i]
mean_sigma = mean_sigma/len(data)
mean_sigma_inv = np.linalg.pinv(mean_sigma)

In [5]:
def qda(x, mu, sigma, sigma_inv, logpi):
    t = np.vstack(x-mu)
    y = np.matmul(t.transpose(), sigma_inv)
    y = np.matmul(y, t)
    y = -0.5*y[0][0]+logpi
    return y

def lda(x, mu, sigma, sigma_inv, logpi):
    t = np.vstack(x-mu)
    y = np.matmul(t.transpose(), sigma_inv)
    y = np.matmul(y, t)
    y = -0.5*y[0][0]+logpi
    return y
    

def show_image(x):
    plt.imshow(x.reshape((28, 28)))

def decide_class_QDA(x):
    vals = []
    for i in range(10):
        vals.append(qda(x, mu[i], sigma[i], sigma_inv[i], log_pi[i]))
    vals = [(vals[i], i) for i in range(len(vals))]
    return vals.index(max(vals))

def decide_class_LDA(x):
    vals = []
    for i in range(10):
        vals.append(lda(x, mu[i], mean_sigma, mean_sigma_inv, log_pi[i]))
    vals = [(vals[i], i) for i in range(len(vals))]
    return vals.index(max(vals))

In [6]:
del data

In [7]:
data = pd.read_csv('mnist_test.csv')
x = []
for i in range(10):
    x_temp = data[data['label'] == i]
    x_temp = x_temp.drop('label', 1)
    x.append(x_temp)

m_samples = []
for i in range(10):
    x[i] = np.array(x[i])
    m_samples.append(len(x[i]))

In [10]:
accuracies = []
correct_all = 0


@numba.njit(nopython=True, parallel=True)
def try_numba(correct):
    for m_i in numba.prange(m_samples[digit]):
        if decide_class_QDA(x[digit][m_i]) == digit:
            correct[m_i] = 1
    return sum(correct)


for digit in range(10):
    print('\rTesting class', digit, 'having', m_samples[digit], 'samples')
    correct = [0 for i in range(m_samples[digit])]
    
    
        if m_i % 10 == 0:
            print('Sample #:', m_i, end='\r')
    accuracies.append((100*correct)/m_samples[digit])

SyntaxError: invalid syntax (<ipython-input-10-91cb78435691>, line 8)

In [None]:
print('QDA Results')
print('Class Accuracies:')
accuracies = enumerate(accuracies)
for acc in accuracies:
    print('Class:', *acc)
total_acc = 100*(correct_all/len(data))
print('Total Accuracy:', total_acc)

In [None]:
accuracies = []
correct_all = 0
for digit in range(10):
    print('\rTesting class', digit, 'having', m_samples[digit], 'samples')
    correct = 0
    for m_i in range(m_samples[digit]):
        if decide_class_LDA(x[digit][m_i]) == digit:
            correct += 1
            correct_all += 1
        if m_i % 10 == 0:
            print('Sample #:', m_i, end='\r')
    accuracies.append((100*correct)/m_samples[digit])

In [None]:
print('LDA Results')
print('Class Accuracies:')
accuracies = enumerate(accuracies)
for acc in accuracies:
    print('Class:', *acc)
total_acc = 100*(correct_all/len(data))
print('Total Accuracy:', total_acc)