In [4]:
import torch
import third_party_cpp as test_cpp

x = torch.randn(4)
y = torch.randn(4)
print(test_cpp.sigmoid_add(x, y))

shape = test_cpp.Shape(5, 3)
cArray = test_cpp.NdArray(shape)
ret = cArray.ones()
cArray.print()

tensor([0.4556, 1.0479, 0.8981, 0.9442])


In [16]:
import numpy as np
x = np.array([[1, 2], [2, 3]])
y = np.array([[2, 3], [1, 2]])

print(x)
print(y)
print(x @ y)
print(y @ x)
print(np.dot(x, y))
print(np.dot(y, x))

In [19]:
import numpy as np
import third_party_cpp as ncpp

x = np.array([[1, 2], [2, 3]])
y = np.array([[2, 3], [1, 2]])

x = ncpp.toNumCpp(x)
y = ncpp.toNumCpp(y)

x, y

AttributeError: module 'third_party_cpp' has no attribute 'toNumCpp'

In [3]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np

seq_length = 28
input_size = 28
hidden_size = 128
num_layers = 1
num_classes = 10
batch_size = 1
num_epochs = 2
learning_rate = 0.01

data_path = "/media/lee/ESD-ISO/script_test/Data/mnist/"
train_dataset = torchvision.datasets.MNIST(root=data_path, train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root=data_path, train=False, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [4]:
def xavier_init(c1, c2, w=1, h=1, fc=False):
    fan_1 = c2 * w * h
    fan_2 = c1 * w * h
    ratio = np.sqrt(6.0 / (fan_1 + fan_2))
    params = ratio * (2 * np.random.random((c1, c2, w, h)) - 1)
    if fc:
        params = params.reshape(c1, c2)
    return params

class My_RNN(object):
    def __init__(self, input_size, hidden_size, num_classes):
        self.lr = learning_rate
        self.seq_length = seq_length
        self.hidden_size = hidden_size
        self.U = xavier_init(hidden_size, input_size, fc=True) # rnn input parameters
        self.W = xavier_init(hidden_size, hidden_size, fc=True) # rnn hidden parameters
        self.V = xavier_init(hidden_size, hidden_size, fc=True) # rnn output parameters
        self.b = np.zeros((hidden_size, 1)) # rnn input parameters
        self.c = np.zeros((hidden_size, 1)) # rnn output parameters
        
        self.FC_W = xavier_init(num_classes, hidden_size, fc=True) # fc parameters
        self.fc_b = np.zeros((num_classes, 1)) # fc parameters
        
        self.mU = np.zeros_like(self.U)
        self.mW = np.zeros_like(self.W)
        self.mV = np.zeros_like(self.V)
        self.mb = np.zeros_like(self.b)
        self.mc = np.zeros_like(self.c)
        
        self.mFC_W = np.zeros_like(self.FC_W)
        self.mfc_b = np.zeros_like(self.fc_b)
        
        self.X = {}
        self.A = {}
        self.S = {}
        self.O = {}
        self.FC_O = {}
        
    def forward(self, x, hprev):
        self.S[-1] = np.copy(hprev)
        
        for t in range(self.seq_length):
            self.X[t] = x[t].T
            self.A[t] = self.U @ self.X[t] + self.W @ self.S[t - 1] + self.b
            self.S[t] = np.tanh(self.A[t])
            self.O[t] = self.V @ self.S[t] + self.c # (hidden, hidden) @ (hidden, 1) + (hidden, 1)
        
        self.FC_O = self.FC_W @ self.O[self.seq_length - 1] + self.fc_b # (classes, hidden) @ (hidden, 1) + (classes, 1)
        
        return self.FC_O # (classes, 1)
    
    def backward(self, dY): # (classes, 1)
        # zero grad
        dFC_W = np.zeros_like(self.FC_W)
        dfc_b = np.zeros_like(self.fc_b)
        
        dU, dW, dV = np.zeros_like(self.U), np.zeros_like(self.W), np.zeros_like(self.V)
        db, dc = np.zeros_like(self.b), np.zeros_like(self.c)
        dS_next = np.zeros_like(self.S[0])
        
        dFC_W = dY @ self.O[self.seq_length - 1].T # (classes, 1) @ (1, hidden)
        dfc_b = dY # (classes, 1)
        dO = self.FC_W.T @ dY
        
        dV = dO @ self.S[self.seq_length - 1].T
        dc = dO
        
        for t in reversed(range(self.seq_length)):
            dS = self.V.T @ dO + dS_next
            dA = (1 - self.S[t] ** 2) * dS
            dU += dA @ self.X[t].T
            dW += dA @ self.S[t - 1].T
            db += dA
            dS_next = self.W.T @ dA
            
        return [dU, dW, dV, db, dc, dFC_W, dfc_b]
        
    def optimizer_step(self, gradients):
        for dparam in gradients:
            np.clip(dparam, -5, 5, out=dparam)
            
        for param, dparam, mem in zip([self.U, self.W, self.V, self.b, self.c, self.FC_W, self.fc_b], 
                                      gradients,
                                      [self.mU, self.mW, self.mV, self.mb, self.mc, self.mFC_W, self.mfc_b]):
            mem += dparam * dparam
            param += -self.lr * dparam / np.sqrt(mem + 1e-8)
        
    def cross_entropy_loss(self, outputs, labels):
        Y = self.softmax(outputs)
        loss = -np.log(Y) * self.one_hot_vector(Y, labels)
        return Y, loss
    
    def softmax(self, x):
        e = np.exp(x)
        return e / np.sum(e)
    
    def deriv_softmax(self, Y, labels):
        dY = np.copy(Y)
        for i in range(len(labels)):
            dY[labels[i]][i] -= 1
        return dY
    
    def one_hot_vector(self, Y, labels):
        out = np.zeros_like(Y)
        for i in range(len(labels)):
            out[labels[i]][i] = 1
        return out
    
    def predict(self, outputs):
        return np.argmax(self.softmax(outputs), 0)

In [6]:
model = My_RNN(input_size, hidden_size, num_classes)

total_step = len(train_loader)
iter_loss = 0
interval = 10000
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(seq_length, batch_size, input_size).detach().numpy()
        labels = labels.detach().numpy()
        
        hprev = np.zeros((hidden_size, 1))
        outputs = model.forward(images, hprev)
        Y, loss = model.cross_entropy_loss(outputs, labels)
        gradients = model.backward(model.deriv_softmax(Y, labels))
        model.optimizer_step(gradients)
        iter_loss += np.sum(loss)
        if (i + 1) % interval == 0:
            print("epoch {}/{} iter {}/{} loss {:.4f}".format(epoch + 1, num_epochs, i + 1, total_step, iter_loss / interval))
            iter_loss = 0

epoch 1/2 iter 10000/60000 loss 1.4499
epoch 1/2 iter 20000/60000 loss 0.8409
epoch 1/2 iter 30000/60000 loss 0.6453
epoch 1/2 iter 40000/60000 loss 0.5867
epoch 1/2 iter 50000/60000 loss 0.5330
epoch 1/2 iter 60000/60000 loss 0.5169
epoch 2/2 iter 10000/60000 loss 0.4754
epoch 2/2 iter 20000/60000 loss 0.4537
epoch 2/2 iter 30000/60000 loss 0.4213
epoch 2/2 iter 40000/60000 loss 0.4268
epoch 2/2 iter 50000/60000 loss 0.4251
epoch 2/2 iter 60000/60000 loss 0.4247


In [7]:
correct = 0
total = 0
for images, labels in test_loader:
    images = images.reshape(seq_length, batch_size, input_size).detach().numpy()
    labels = labels.detach().numpy()
    
    hprev = np.zeros((hidden_size, 1))
    outputs = model.forward(images, hprev)
    pred = model.predict(outputs)
    total += labels.shape[0]
    correct += (pred == labels).sum().item()

print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

Test Accuracy of the model on the 10000 test images: 88.68 %


In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np

seq_length = 28
input_size = 28
hidden_size = 128
num_layers = 1
num_classes = 10
batch_size = 1
num_epochs = 2
learning_rate = 0.01

data_path = "/media/english/ESD-ISO/script_test/Data/mnist/"
train_dataset = torchvision.datasets.MNIST(root=data_path, train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root=data_path, train=False, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
def xavier_init(c1, c2, w=1,h=1,fc=False):
    fan_1 = c2 * w * h
    fan_2 = c1 * w * h
    ratio = np.sqrt(6.0 / (fan_1 + fan_2))
    params = ratio * (2 * np.random.random((c1, c2, w, h)) - 1)
    if fc:
        params = params.reshape(c1, c2)
    return params

class My_LSTM(object):
    def __init__(self, x_size, hidden_size, num_classes):
        self.lr = learning_rate
        self.seq_length = seq_length
        self.input_size = x_size + hidden_size
        
        self.W_f = xavier_init(hidden_size, self.input_size, fc=True)
        self.b_f = np.zeros((hidden_size, 1))
        
        self.W_i = xavier_init(hidden_size, self.input_size, fc=True)
        self.b_i = np.zeros((hidden_size, 1))
        
        self.W_g = xavier_init(hidden_size, self.input_size, fc=True)
        self.b_g = np.zeros((hidden_size, 1))
        
        self.W_o = xavier_init(hidden_size, self.input_size, fc=True)
        self.b_o = np.zeros((hidden_size, 1))
        
        self.W_fc = xavier_init(num_classes, hidden_size, fc=True)
        self.b_fc = np.zeros((num_classes, 1))
        
        self.mW_f = np.zeros_like(self.W_f)
        self.mb_f = np.zeros_like(self.b_f)
        
        self.mW_i = np.zeros_like(self.W_i)
        self.mb_i = np.zeros_like(self.b_i)
        
        self.mW_g = np.zeros_like(self.W_g)
        self.mb_g = np.zeros_like(self.b_g)
        
        self.mW_o = np.zeros_like(self.W_o)
        self.mb_o = np.zeros_like(self.b_o)
        
        self.mW_fc = np.zeros_like(self.W_fc)
        self.mb_fc = np.zeros_like(self.b_fc)
        
        self.X = {}
        self.F = {}
        self.F_A = {}
        
        self.I = {}
        self.I_A = {}
        
        self.G = {}
        self.G_A = {}
        
        self.O = {}
        self.O_A = {}
        
        self.C = {}
        self.C_A = {}
        self.H = {}
        
    def forward(self, x, hprev, cprev):
        self.X = {}
        self.F = {}
        self.F_A = {}
        
        self.I = {}
        self.I_A = {}
        
        self.G = {}
        self.G_A = {}
        
        self.O = {}
        self.O_A = {}
        
        self.C = {}
        self.C_A = {}
        self.H = {}
        
        self.H[-1] = np.copy(hprev)
        self.C[-1] = np.copy(cprev)
        
        for t in range(self.seq_length):
            self.X[t] = np.concatenate((self.H[t-1], x[t].T), axis = 0)
            
            self.F[t] = self.W_f @ self.X[t] + self.b_f
            self.F_A[t] = self.sigmoid(self.F[t])
            
            self.I[t] = self.W_i @ self.X[t] + self.b_i
            self.I_A[t] = self.sigmoid(self.I[t])
            
            self.G[t] = self.W_g @ self.X[t] + self.b_g
            self.G_A[t] = np.tanh(self.G[t])
            
            self.C[t] = self.F_A[t] * self.C[t - 1] + self.I_A[t] * self.G_A[t]
            self.C_A[t] = np.tanh(self.C[t])
            
            self.O[t] = self.W_o @ self.X[t] + self.b_o
            self.O_A[t] = self.sigmoid(self.O[t])
            
            self.H[t] = self.O_A[t] * self.C_A[t]
            
        output = self.W_fc @ self.H[self.seq_length - 1] + self.b_fc
        
        return output
    
    def backward(self, dY):
        dW_f, db_f = np.zeros_like(self.W_f), np.zeros_like(self.b_f)
        dW_i, db_i = np.zeros_like(self.W_i), np.zeros_like(self.b_i)
        dW_g, db_g = np.zeros_like(self.W_g), np.zeros_like(self.b_g)
        dW_o, db_o = np.zeros_like(self.W_o), np.zeros_like(self.b_o)
        dW_fc, db_fc = np.zeros_like(self.W_fc), np.zeros_like(self.b_fc)
        
        dH_next = np.zeros_like(self.H[0])
        dC_next = np.zeros_like(self.C[0])
        
        dW_fc = dY @ self.H[self.seq_length - 1].T
        db_fc = dY
        
        for t in reversed(range(self.seq_length)):
            dh = self.W_fc.T @ dY + dH_next
            
            dO_A = dh * self.C_A[t]
            dO = dO_A * (self.O_A[t] * (1 - self.O_A[t]))
            dW_o += dO @ self.X[t].T
            db_o += dO
            
            dC_A = self.O_A[t] * dh
            dC = dC_A * (1 - self.C_A[t] ** 2) + dC_next
            
            dF_A = dC * self.C[t - 1]
            dI_A = dC * self.G_A[t]
            dG_A = self.I_A[t] * dC
            dC_next = self.F_A[t] * dC
            
            dF = dF_A * (self.F_A[t] * (1 - self.F_A[t]))
            dW_f += dF @ self.X[t].T
            db_f += dF
            
            dI = dI_A * (self.I_A[t] * (1 - self.I_A[t]))
            dW_i += dI @ self.X[t].T
            db_i += dI
            
            dG = dG_A * (1 - self.G_A[t] ** 2)
            dW_g += dG @ self.X[t].T
            db_g += dG
            
            dX = self.W_f.T @ dF + self.W_i.T @ dI + self.W_g.T @ dG + self.W_o.T @ dO
            dH_next = dX[:hidden_size, :]
        
        gradients = [dW_f, db_f, dW_i, db_i, dW_g, db_g, dW_o, db_o, dW_fc, db_fc]
        
        return gradients
    
    def optimizer_step(self, gradients):
        for dparam in gradients:
            np.clip(dparam, -5, 5, out=dparam)
        
        for param, dparam, mem in zip(
            [self.W_f, self.b_f, self.W_i, self.b_i, self.W_g, self.b_g, self.W_o, self.b_o, self.W_fc, self.b_fc],
            gradients,
            [self.mW_f, self.mb_f, self.mW_i, self.mb_i, self.mW_g, self.mb_g, self.mW_o, self.mb_o, self.mW_fc, self.mb_fc]):
            mem += dparam * dparam
            param += -self.lr * dparam / np.sqrt(mem + 1e-8)
            
    def cross_entropy_loss(self, outputs, labels):
        Y = self.softmax(outputs)
        loss = -np.log(Y) * self.one_hot_vector(Y, labels)
        return Y, loss
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def softmax(self, x):
        e = np.exp(x)
        return e / np.sum(e)
    
    def deriv_softmax(self, Y, labels):
        dY = np.copy(Y)
        for i in range(len(labels)):
            dY[labels[i]][i] -= 1
        return dY
    
    def one_hot_vector(self, Y, labels):
        out = np.zeros_like(Y)
        for i in range(len(labels)):
            out[labels[i]][i] = 1
        return out
    
    def predict(self, outputs):
        return np.argmax(self.softmax(outputs), 0)

In [None]:
model = My_LSTM(input_size, hidden_size, num_classes)

total_step = len(train_loader)
iter_loss = 0
interval = 1000
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(seq_length, batch_size, input_size).detach().numpy()
        labels = labels.detach().numpy()
        
        hprev = np.zeros((hidden_size, 1))
        cprev = np.zeros((hidden_size, 1))
        outputs = model.forward(images, hprev, cprev)
        Y, loss = model.cross_entropy_loss(outputs, labels)
        gradients = model.backward(model.deriv_softmax(Y, labels))
        model.optimizer_step(gradients)
        iter_loss += np.sum(loss)
        if(i + 1) % interval == 0:
            print("epoch {}/{} iter {}/{} loss {:.4f}".format(epoch + 1, num_epochs, i + 1, total_step, iter_loss / interval))
            iter_loss = 0

In [None]:
correct = 0
total = 0
for images, labels in test_loader:
    images = images.reshape(seq_length, batch_size, input_size).detach().numpy()
    labels = labels.detach().numpy()
    
    hprev = np.zeros((hidden_size, 1))
    cprev = np.zeros((hidden_size, 1))
    outputs = model.forward(images, hprev, cprev)
    pred = model.predict(outputs)
    total += labels.shape[0]
    correct += (pred == labels).sum().item()

print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

Test Accuracy of the model on the 10000 test images: 95.73 %


In [None]:
import numpy as np
import torch

In [2]:
class RNN:
    def __init__(self, hidden_size, vocab_size, seq_length, learning_rate):
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.seq_length = seq_length
        self.learning_rate = learning_rate
        
        self.U = np.random.uniform(-np.sqrt(1./vocab_size), np.sqrt(1./vocab_size), (hidden_size, vocab_size))
        self.V = np.random.uniform(-np.sqrt(1./hidden_size), np.sqrt(1./hidden_size), (vocab_size, hidden_size))
        self.W = np.random.uniform(-np.sqrt(1./hidden_size), np.sqrt(1./hidden_size), (hidden_size, hidden_size))
        self.b = np.zeros((hidden_size,1))
        self.c = np.zeros((hidden_size, 1))
        
        self.mU = np.zeros_like(self.U)
        self.mW = np.zeros_like(self.W)
        self.mV = np.zeros_like(self.V)
        self.mb = np.zeros_like(self.b)
        self.mc = np.zeros_like(self.c)
        
    def softmax(self, x):
        p = np.exp(x - np.max(x))
        return p / np.sum(p)
    
    def forward(self, inputs, hprev):
        xs, hs, os, ycap = {}, {}, {}, {}
        hs[-1] = np.copy(hprev)
        for t in range(len(inputs)):
            xs[t] = np.zeros(self.vocab_size, 1)
            xs[t][inputs[t]] = 1
            hs[t] = np.tanh(np.dot(self.U, xs[t]) + np.dot(self.W, hs[t - 1]) + self.b)
            os[t] = np.dot(self.V, hs[t]) + self.c
            ycap[t] = self.softmax(os[t])
        return xs, hs, ycap
    
    def backward(self, xs, hs, ps, targets):
        dU, dW, dV = np.zeros_like(self.U), np.zeros_like(self.W), np.zeros_like(self.V)
        db, dc = np.zeros_like(self.b), np.zeros_like(self.c)
        dhnext = np.zeros_like(hs[0])
        for t in reversed(range(self.seq_length)):
            dy = np.copy(ps[t])
            dy[targets[t]] -= 1
            dV += np.dot(dy, hs[t].T)
            dc += dy
            dh = np.dot(self.V.T, dy) + dhnext
            dhprec = (1 - hs[t] * hs[t]) * dh
            dU += np.dot(dhprec, xs[t].T)
            dW += np.dot(dhprec, hs[t-1].T)
            db += dhprec
            dhnext = np.dot(self.W.T, dhprec)
            
        for dparam in [dU, dW, dV, db, dc]:
            np.clip(dparam, -5, 5, out=dparam)
        return dU, dW, dV, db, dc
    
    def loss(self, ps, targets):
        return sum(-np.log(ps[t][targets[t],0]) for t in range(self.seq_length))
    
    def update_model(self, dU, dW, dV, db, dc):
        for param, dparam, mem in zip([self.U, self.W, self.V, self.b, self.c],
                                     [dU, dW, dV, db, dc],
                                     [self.mU, self.mW, self.mV, self.mb, self.mc]):
            mem += dparam * dparam
            param += -self.learning_rate * dparam / np.sqrt(mem + 1e-8)
    
    def train(self, data_reader):
        iter_num= 0
        threshold = 0.1
        smooth_loss = -np.log(1.0 / data_reader.vocab_size) * self.seq_length
        while (smooth_loss > threshold):
            if data_reader.just_started():
                hprev = np.zeros((self.hidden_size, 1))
            inputs, targets = data_reader.next_batch()
            xs, hs, ps = self.forward(inputs,hprev)
            dU, dW, dV, db, dc = self.backward(xs, hs, ps, targets)
            loss = self.loss(ps, targets)
            

In [5]:
def xavier_init(c1, c2, w=1, h=1, fc=False):
    fan_1 = c2 * w * h
    fan_2 = c1 * w * h
    ratio = np.sqrt(6.0 / (fan_1 + fan_2))
    params = ratio * (2 * np.random.random((c1, c2, w, h)) - 1)
    if fc:
        params = params.reshape(c1, c2)
    return params

def generate_dataset(data_size, length, split_ratio):
    X = np.random.uniform(0, 1, (data_size, length, 1))
    Y = np.zeros((data_size, length, 1))
    threshold = length / 2.
    for i in range(data_size):
        prefix_sum = 0
        for j in range(length):
            prefix_sum += X[i][j][0]
            Y[i][j][0] = int(prefix_sum > threshold)
    split_point = int(data_size * split_ratio)
    train_x, test_x = X[:split_point], X[split_point:]
    train_y, test_y = Y[:split_point], Y[split_point:]
    return np.swapaxes(train_x, 0, 1), np.swapaxes(test_x, 0, 1), \
           np.swapaxes(train_y, 0, 1), np.swapaxes(test_y, 0, 1)

class RNN(object):
    def __init__(self, input_dim, hidden_dim, output_dim, seq_length, lr=0.002):
        self.lr = lr
        self.seq_length = seq_length
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.U = xavier_init(hidden_dim, input_dim, fc=True)
        self.W = xavier_init(hidden_dim, hidden_dim, fc=True)
        self.V = xavier_init(output_dim, hidden_dim, fc=True)
        self.b = np.zeros((hidden_dim, 1))
        self.c = np.zeros((output_dim, 1))
        
        self.mU = np.zeros_like(self.U)
        self.mW = np.zeros_like(self.W)
        self.mV = np.zeros_like(self.V)
        self.mb = np.zeros_like(self.b)
        self.mc = np.zeros_like(self.c)
        
        self.X = {}
        self.H = {}
        self.O = {}
        self.Y = {}

    def forward(self, x, hprev):
        self.X = {}
        self.H = {}
        self.O = {}
        self.Y = []
        self.H[-1] = np.copy(hprev)
        
        for t in range(self.seq_length):
            self.X[t] = x[t:t+1]
            self.H[t] = np.tanh(self.U @ self.X[t] + self.W @ self.H[t - 1] + self.b) 
            self.O[t] = self.V @ self.H[t] + self.c
            self.Y.append(self.sigmoid(self.O[t]))
        self.Y = np.array(self.Y)
        return self.Y
    
    def backward(self, label):
        dU, dW, dV = np.zeros_like(self.U), np.zeros_like(self.W), np.zeros_like(self.V)
        db, dc = np.zeros_like(self.b), np.zeros_like(self.c)
        dH_next = np.zeros_like(self.H[0])
        
        for t in reversed(range(self.seq_length)):
            gt = label[t:t+1]
            dY = self.Y[t] - gt 
            dO = dY * (self.Y[t] * (1 - self.Y[t])) 
            dV += dO @ self.H[t].T 
            dc += dO 
            dH = self.V.T @ dO + dH_next 
            dTan = (1 - self.H[t] ** 2) * dH 
            dU += dTan @ self.X[t].T
            dW += dTan @ self.H[t - 1].T
            db += dTan
            dH_next = self.W.T @ dTan
        
        self.update_model(dU, dW, dV, db, dc)
        
    def update_model(self, dU, dW, dV, db, dc):
        for dparam in [dU, dW, dV, db, dc]:
            np.clip(dparam, -5, 5, out=dparam)
            
        for param, dparam, mem in zip([self.U, self.W, self.V, self.b, self.c],
                                     [dU, dW, dV, db, dc],
                                     [self.mU, self.mW, self.mV, self.mb, self.mc]):
            mem += dparam * dparam
            param += -self.lr * dparam / np.sqrt(mem + 1e-8)
            
    def sigmoid(self, x):
        return np.where(x >= 0, 1 / (1 + np.exp(-x)), np.exp(x) / (1 + np.exp(x)))
    
def main():
    length = 12
    data_size = 1000
    split_ratio = 0.8
    hidden_size = 8
    epochs = 500
    train_x, test_x, train_y, test_y = generate_dataset(data_size, length, split_ratio)
    rnn = RNN(1, hidden_size, 1, length)
    for epoch in range(epochs):
        loss = 0
        for iters in range(train_x.shape[1]):
            label = train_y[:, iters, :]
            hprev = np.zeros((hidden_size, 1))
            output = rnn.forward(train_x[:, iters, :], hprev)
            output = output[:,:,0]
            rnn.backward(label)
            loss += (output - label) ** 2
        print("train epoch : {}/{} loss : {:6f}".format(epoch, epochs, np.sum(loss) / train_x.shape[1]))
        
        if epoch == 0 or epoch % 100 != 0:
            continue
        TP_FP = 0
        TP_FN = 0
        TP = 0
        for iters in range(test_x.shape[1]):
            label = test_y[:, iters, :]
            hprev = np.zeros((hidden_size, 1))
            output = rnn.forward(test_x[:, iters, :], hprev)
            output = output[:,:,0]
            predict = output > 0.5
            TP_FP += np.sum(predict)
            TP_FN += np.sum(label)
            TP += np.sum(np.logical_and(predict, label))
            
        precision = 0 if TP_FP == 0 else TP / TP_FP
        recall = 0 if TP_FN == 0 else TP / TP_FN
        print("eval epoch : {}/{} precision : {:6f}, recall : {:6f}".format(epoch, epochs, precision, recall))


In [None]:
main()