In [1]:
!gdown --id '1YMux1D-GDX1J0VM_3EK_zEhUbWP9OFFF' --output hw4_nnet_train.dat
!gdown --id '1Y9qOjFTESkP-fKOkUdYkaeKHYteuDYFA' --output hw4_nnet_test.dat

Downloading...
From: https://drive.google.com/uc?id=1YMux1D-GDX1J0VM_3EK_zEhUbWP9OFFF
To: /content/hw4_nnet_train.dat
100% 535/535 [00:00<00:00, 769kB/s]
Downloading...
From: https://drive.google.com/uc?id=1Y9qOjFTESkP-fKOkUdYkaeKHYteuDYFA
To: /content/hw4_nnet_test.dat
100% 5.35k/5.35k [00:00<00:00, 8.31MB/s]


In [4]:
import numpy as np
import torch


def data(path: str):
    data = np.genfromtxt(path)
    y, X = data[:, -1], data[:, :-1]
    return torch.from_numpy(X), torch.from_numpy(y)


def tanhp(x: float) -> float:
    return 1-torch.tanh(x)**2


class NeuralNetwork():
    '''d-M-1 network with all tanh neurons connected'''

    def __init__(self, x_train, y_train, M: list = [3], r: float = 0.1, eta: float = 0.1):
        self.x_train = x_train
        self.y_train = y_train
        self.layer = len(M)  # number of hidden layer
        self.din = len(x_train[0])
        try:
            self.dout = len(y_train[1])
        except TypeError:
            self.dout = 1
        self.M = M
        self.r = r
        self.eta = eta

        self.S = [0]*(self.layer+1)  # W-S-tanh
        self.X = [0]*(self.layer+1)  # tanh-X-W
        self.Delta = [0]*(self.layer+1)
        tmp = M + [self.dout]
        for l in range(self.layer + 1):
            self.S[l] = torch.zeros(tmp[l])
            self.X[l] = torch.zeros(tmp[l])
            self.Delta[l] = torch.zeros(tmp[l])

        self.W = [0]*(self.layer+1)
        tmp = [self.din]+M+[self.dout]
        for l in range(self.layer+1):
            self.W[l] = torch.FloatTensor(tmp[l]+1, tmp[l+1]).uniform_(-r, r)

    def forward(self, x0):
        for l in range(self.layer+1):
            tmp = [x0]+self.X
            tmp2 = torch.cat((torch.ones(1), tmp[l]), dim=0)
            self.S[l] = self.W[l].T@tmp2.float()
            self.X[l] = torch.tanh(self.S[l])

    def backward(self, y0):
        for l in range(self.layer, -1, -1):
            if l == self.layer:
                self.Delta[l] = -2*(y0-np.tanh(self.S[l]))@tanhp(self.S[l])
                self.Delta[l] = self.Delta[l].unsqueeze(0)
            else:
                self.Delta[l] = self.W[l+1][1:, :]@self.Delta[l+1] \
                    * tanhp(self.S[l])

    def grad_des(self, x0):
        tmp = [torch.mean(x0, axis=0)]+self.X
        for l in range(len(self.W)):
            tmp_x = torch.cat(
                (torch.ones(1), tmp[l]), dim=0).float().unsqueeze(0)
            self.W[l] -= self.eta*tmp_x.T@self.Delta[l].unsqueeze(0)

    def backprop(self, T: int, n: int):
        '''T: iteration; n: mini batch size'''
        for _ in range(T):
            # stochastic
            idx = torch.randint(0, len(self.x_train), size=(n,))
            x_tmp, y_tmp = self.x_train[idx], self.y_train[idx]
            for i in range(n):
                self.forward(x_tmp[i])
                self.backward(y_tmp[i])
            self.grad_des(x_tmp)


def error(x, y):
    yhat = torch.zeros_like(y)
    for i in range(len(x)):
        nn.forward(x[i])
        yhat[i] = nn.S[-1]
    return float(torch.count_nonzero(torch.sign(yhat) != y)/len(y))


if __name__ == "__main__":
    x_train, y_train = data("MOOC/techniques/HW4/hw4_nnet_train.dat")
    x_test, y_test = data("MOOC/techniques/HW4/hw4_nnet_test.dat")
    N = 10
    T, n = 50000, 1
    Ms = [[1], [6], [11], [16], [21]]  # 6
    rs = [0, 0.001, 0.1, 10, 1000]  # 0.001
    etas = [0.001, 0.01, 0.1, 1, 10]  # 0.01

    err_Ms = [0, 0, 0, 0, 0]
    err_rs = [0, 0, 0, 0, 0]
    err_etas = [0, 0, 0, 0, 0]
    err_83 = 0

    for _ in range(N):
        for i in range(len(Ms)):
            nn = NeuralNetwork(x_train, y_train, M=Ms[i])
            nn.backprop(T, n)
            err_Ms[i] += error(x_test, y_test)
        print("Ms finished")

        for i in range(len(rs)):
            nn = NeuralNetwork(x_train, y_train, r=rs[i])
            nn.backprop(T, n)
            err_rs[i] += error(x_test, y_test)
        print("rs finished")

        for i in range(len(etas)):
            nn = NeuralNetwork(x_train, y_train, eta=etas[i])
            nn.backprop(T, n)
            err_etas[i] += error(x_test, y_test)
        print("etas finished")

        nn = NeuralNetwork(x_train, y_train)
        nn = NeuralNetwork(x_train, y_train, M=[8, 3], eta=0.01)
        nn.backprop(T, n)
        err_83 += error(x_test, y_test)

        print(err_Ms, Ms[np.argmin(np.array(err_Ms))])
        print(err_rs, rs[np.argmin(np.array(err_rs))])
        print(err_etas, etas[np.argmin(np.array(err_etas))])
        print(err_83/N)  # 0.036

# [2.9239999651908875, 0.37199998646974564, 0.7799999788403511, 0.7879999801516533, 0.5839999839663506] [6]
# [5.055999994277954, 0.37599998712539673, 0.36799998581409454, 3.476000055670738, 5.251999944448471] 0.1
# [1.084000013768673, 0.35999998450279236, 0.36799998581409454, 4.932000011205673, 5.055999994277954] 0.01
# 0.03719999864697456

Ms finished
rs finished
etas finished
[0.527999997138977, 0.035999998450279236, 0.03999999910593033, 0.23999999463558197, 0.035999998450279236] [6]
[0.47200000286102295, 0.03999999910593033, 0.035999998450279236, 0.3240000009536743, 0.47200000286102295] 0.1
[0.07199999690055847, 0.035999998450279236, 0.035999998450279236, 0.527999997138977, 0.47200000286102295] 0.01
0.0035999998450279235
Ms finished
rs finished
etas finished
[0.767999991774559, 0.07199999690055847, 0.07599999755620956, 0.2799999937415123, 0.07599999755620956] [6]
[1.0, 0.07599999755620956, 0.07199999690055847, 0.8519999980926514, 0.972000002861023] 0.1
[0.18799999356269836, 0.07199999690055847, 0.07199999690055847, 1.0, 1.0] 0.01
0.007599999755620956
Ms finished
rs finished
etas finished
[1.007999986410141, 0.10799999535083771, 0.1119999960064888, 0.31599999219179153, 0.31599999219179153] [6]
[1.527999997138977, 0.1119999960064888, 0.1119999960064888, 1.2479999959468842, 1.487999975681305] 0.001
[0.25599999725818634, 0