In [1]:
import sys, numpy as np, math, random, pickle

def p_net(A, x, w_list, b_list):
    new_A = np.vectorize(A)
    new_round = np.vectorize(round)
    a = x
    for layer in range(1, len(w_list)):
        a = new_A(a@w_list[layer] + b_list[layer])
    return a

def p_net2(A, x, w_list, b_list):
    new_A = np.vectorize(A)
    new_round = np.vectorize(round)
    new_zero = np.vectorize(zero)
    a = x
    for layer in range(1, len(w_list)):
        a = new_A(a@w_list[layer] + b_list[layer])
    maxi = max(max(a))
    return new_zero(a, maxi)
    
    
def zero(num, maxi):
    return 0 if num < maxi else 1

def sigmoid(num):
    return 1 / (1 + np.exp(-num))

def dsigmoid(num):
    return sigmoid(num) * (1 - sigmoid(num))

def error(result, output):
    return 0.5 * np.linalg.norm(output - result)**2

def train2(trainSet, epoch, w_list, b_list, A, rate):
    a_list, delta_list, dot_list = list(), list(), list()
    N = len(w_list)
    new_a = np.vectorize(sigmoid)
    new_da = np.vectorize(dsigmoid)
    lastError = None
    error2 = None
    wrong = 0
    for j in trainSet:
        if not np.array_equal(j[1], p_net2(sigmoid, j[0], w_list, b_list)):
            wrong += 1
    print("EPOCH: none, mis-classified: %s, percent: %s" % (wrong, wrong/len(trainSet)))
    for i in range(epoch):
        for x, y in trainSet:
            a_list, delta_list, dot_list = list(), list(), list()
            a_list.append(x)
            delta_list.append("")
            dot_list.append(0)
            for layer in range(1, N):
                dot = a_list[layer-1]@w_list[layer] + b_list[layer]
                dot_list.append(dot)
                a_list.append(new_a(dot))
                delta_list.append("")
            delta_list[N-1] = new_da(dot_list[N-1]) * (y - a_list[N-1]) 
            for layer in range(N-2, 0, -1):
                delta_list[layer] = new_da(dot_list[layer]) * (delta_list[layer+1] @ w_list[layer+1].transpose())
            for layer in range(1, N):
                b_list[layer] = b_list[layer] + rate * delta_list[layer]
                w_list[layer] = w_list[layer] + rate * (a_list[layer-1].transpose() @ delta_list[layer])
        # after epoch finishes, find number of mis-classified points
        wrong = 0
        for j in trainSet:
            if not np.array_equal(j[1], p_net2(sigmoid, j[0], w_list, b_list)):
                wrong += 1
        print("EPOCH: %s, mis-classified: %s, percent: %s" % (i, wrong, wrong/len(trainSet)))
        pickle.dump(w_list, open("w_list.txt", "wb"))
        pickle.dump(b_list, open("b_list.txt", "wb"))
    return w_list, b_list

def generate(network):
    w_list, b_list = list(), list()
    # first is the w_list
    w_list.append(None)
    b_list.append(None)
    for i in range(len(network)-1):
        w_list.append(2 * np.random.rand(network[i], network[i+1]) - 1)
    # now the b_list
    for i in range(1, len(network)):
        b_list.append(2 * np.random.rand(1, network[i]) - 1)
    return w_list, b_list


train = pickle.load(open("trainSet.txt", "rb"))
print("Network: [784, 300, 100, 10]")
w_list, b_list = pickle.load(open("w_list.txt", "rb")), pickle.load(open("b_list.txt", "rb"))
pickle.dump(w_list, open("best_w_list.txt", "wb"))
pickle.dump(b_list, open("best_b_list.txt", "wb"))


w_list, b_list = generate([784, 300, 100, 10])
w_list, b_list = train2(train, 5, w_list, b_list, sigmoid, 0.15)


### TESTING PART
test = pickle.load(open("testSet.txt", "rb"))
w_list = pickle.load(open("w_list.txt", "rb"))
b_list = pickle.load(open("b_list.txt", "rb"))

wrong = 0
errorSum = 0
for i in test:
    result = p_net2(sigmoid, i[0], w_list, b_list)
    if not np.array_equal(i[1], result):
        wrong += 1
    errorSum += error(result, i[1])    
print("misclassified percent testSet: %s" % (wrong / len(test),))






Network: [784, 300, 100, 10]
EPOCH: none, mis-classified: 53545, percent: 0.8924166666666666
EPOCH: 0, mis-classified: 4150, percent: 0.06916666666666667
EPOCH: 1, mis-classified: 2670, percent: 0.0445
EPOCH: 2, mis-classified: 2087, percent: 0.03478333333333333
EPOCH: 3, mis-classified: 1614, percent: 0.0269
EPOCH: 4, mis-classified: 1265, percent: 0.021083333333333332
misclassified percent testSet: 0.0373
