In [None]:
import random
import math
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score


In [None]:
 files = ['train3.txt',
train5file = 'train5.txt'
test3file = 'test3.txt'
test5file = 'test5.txt'

In [None]:
def parseFile(filename):
    contents = []
    with open(filename) as f:
        for line in f:
            contents.append([int(item) for item in line.strip('\n').split()])
    return contents

In [None]:
train3file = 'train3.txt'
train5file = 'train5.txt'
test3file = 'test3.txt'
test5file = 'test5.txt'

In [None]:
train3 = parseFile(train3file)
train5 = parseFile(train5file)
test3 = parseFile(test3file)
test5 = parseFile(test5file)

In [None]:
ytrain3 = [0] * len(train3)
ytrain5 = [1] * len(train5)
ytest3 = [0] * len(test3)
ytest5 = [1] * len(test5)

train3.extend(train5)
test3.extend(test5)
ytrain3.extend(ytrain5)
ytest3.extend(ytest5)

x_train = train3
x_test = test3
y_train = ytrain3
y_test = ytest3

In [None]:
def listToNparray(listOfList):
    return np.array([np.array(xi) for xi in listOfList])

In [None]:
x_train = listToNparray(x_train)
x_test = listToNparray(x_test)
y_train = listToNparray(y_train)
y_test = listToNparray(y_test)

In [None]:
x_train, y_train = shuffle(x_train, y_train, random_state=0)
x_test, y_test = shuffle(x_test, y_test, random_state=0)

In [None]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

sigmoid_vec = np.vectorize(sigmoid)

In [None]:
def gradient(y_data, x_data, w):
    arr = y_data - sigmoid_vec(x_train.dot(w))
    return np.sum((np.expand_dims(arr, axis=1) * x_data), axis=0)

In [None]:
def logLikelihood(y_data, x_data, w):
    vec_log = np.vectorize(math.log)
    sigma = sigmoid_vec(x_data.dot(w))
    return np.sum(y_data * vec_log(sigma) + ((1 - y_data) * vec_log(1 - sigma)), axis = 0)

In [None]:
def percentErrorRate(y_data, x_data, w):
    sigma = sigmoid_vec(x_data.dot(w))
    sigma[sigma >= 0.5] = 1
    sigma[sigma < 0.5] = 0
    return (1 - accuracy_score(y_data, sigma)) * 100

In [None]:
lr = 0.2/len(train3)
w = np.array([random.random() for i in range(64)])

In [None]:
likelihoods = []
error_rates = []

while(True):
    error_rates.append(percentErrorRate(y_train, x_train, w))
    likelihoods.append(logLikelihood(y_train, x_train, w))
    new_w = w + (lr * gradient(y_train, x_train, w))
    if np.sum(abs(new_w - w)) < 0.000001:
        break
    w = new_w

In [None]:
plt.plot(likelihoods)
plt.ylabel('log likelihood')
plt.show()

In [None]:
plt.plot(error_rates)
plt.ylabel('Percent Error Rate')
plt.show()

In [None]:

print('Training Data:')
print('Percent Error Rate: ', error_rates[-1])
print('Log Likelihood: {}', likelihoods[-1])

In [None]:
w.reshape((8, 8))

In [None]:
print('Testing Data:')
print('Percent Error Rate: ', percentErrorRate(y_test, x_test, w))
print('Log Likelihood: ', logLikelihood(y_test, x_test, w))