In [40]:
import cupy as cp
import numpy as np
import matplotlib.pyplot as plt
import os
import time
from sklearn.utils.extmath import randomized_svd
from layers.layers import MatMul, softmax, WordEmbed, RNN, TimeAffine, TimeEmbedding, TimeRNN, TimeSoftmaxWithLoss

In [41]:
def remove_duplicate(params, grads):
    params, grads = params[:], grads[:]  # copy list

    while True:
        find_flg = False
        L = len(params)

        for i in range(0, L - 1):
            for j in range(i + 1, L):
                if params[i] is params[j]:
                    grads[i] += grads[j] 
                    find_flg = True
                    params.pop(j)
                    grads.pop(j)
                elif params[i].ndim == 2 and params[j].ndim == 2 and \
                     params[i].T.shape == params[j].shape and np.all(params[i].T == params[j]):
                    grads[i] += grads[j].T
                    find_flg = True
                    params.pop(j)
                    grads.pop(j)

                if find_flg: break
            if find_flg: break

        if not find_flg: break

    return params, grads


class Trainer:
    def __init__(self, model, optimizer):
        self.model = model
        self.optimizer = optimizer
        self.loss_list = []
        self.eval_interval = None
        self.current_epoch = 0

    def fit(self,
            x,
            t,
            max_epoch=10,
            batch_size=32,
            max_grad=None,
            eval_interval=20):
        data_size = len(x)
        max_iters = data_size // batch_size
        self.eval_interval = eval_interval
        model, optimizer = self.model, self.optimizer
        total_loss = 0
        loss_count = 0

        start_time = time.time()
        for epoch in range(max_epoch):
            # シャッフル
            idx = np.random.permutation(np.arange(data_size))
            x = x[idx]
            t = t[idx]
            for iters in range(max_iters):
                batch_x = x[iters * batch_size:(iters + 1) * batch_size]
                batch_t = t[iters * batch_size:(iters + 1) * batch_size]

                loss = model.forward(batch_x, batch_t)
                model.backward()
                params, grads = remove_duplicate(model.params,
                                                 model.grads)  # 共有された重みを1つに集約
                if max_grad is not None:
                    clip_grads(grads, max_grad)
                optimizer.update(params, grads)
                total_loss += loss
                loss_count += 1

                # 評価
                if (eval_interval
                        is not None) and (iters % eval_interval) == 0:
                    avg_loss = total_loss / loss_count
                    elapsed_time = time.time() - start_time
                    print(
                        '\r| epoch %-5d |  iter %-5d / %-5d | time %10d[s] | loss %.2f'
                        % (self.current_epoch + 1, iters + 1, max_iters,
                           elapsed_time, avg_loss),
                        end='',
                        flush=True)
                    self.loss_list.append(float(avg_loss))
                    total_loss, loss_count = 0, 0

            self.current_epoch += 1

    def plot(self, ylim=None):
        x = np.arange(len(self.loss_list))
        if ylim is not None:
            plt.ylim(*ylim)
        plt.plot(x.get(), self.loss_list, label='train')
        plt.xlabel('iterations (x' + str(self.eval_interval) + ')')
        plt.ylabel('loss')
        plt.show()


class Adam:
    '''
    Adam (http://arxiv.org/abs/1412.6980v8)
    '''
    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.iter = 0
        self.m = None
        self.v = None

    def update(self, params, grads):
        if self.m is None:
            self.m, self.v = [], []
            for param in params:
                self.m.append(np.zeros_like(param))
                self.v.append(np.zeros_like(param))

        self.iter += 1
        lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (
            1.0 - self.beta1**self.iter)

        for i in range(len(params)):
            self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i])
            self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i])

            params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7)

class Preprocess:
    def __init__(self, text: str, *args):
        dictionary = {i: f' {i}' for i in args}
        text = text.lower()
        for i in dictionary:
            text = text.replace(i, dictionary.get(i))
        self.text = text.split(' ')
        self.repeated = []
        
    def get_word_id(self):
        dictionary = {}
        dictionary2 = {}
        corpus = []
        append = corpus.append
        counter = 0
        for index, i in enumerate(self.text):
            if i not in dictionary:
                dictionary[i] = counter
                dictionary2[counter] = i
                counter += 1
                append(dictionary[i])
            else:
                append(dictionary[i])
                self.repeated.append(index)
        return dictionary, dictionary2, corpus

    def get_single_context(self,id_word:dict, word_id:dict, corpus: list, word: str,window: int):  # list bound check
        text = self.text
        word = word.lower()
        length = len(text)
        if word not in text:
            return
        ls = [0] * len(corpus)
        for index, i in enumerate(text):
            if word_id[i] == word_id[word]:    
                if index == 0:
                    counter = 1
                    for k in range(window):
                        ls[counter] += 1
                        counter += 1
                elif index == length - 1:
                    counter = 1
                    for p in range(window):
                        ls[-1-counter] += 1
                        counter += 1
                else:
                    counter = counter2 = 1
                    word1_id = word_id[text[index - counter]]
                    word2_id = word_id[text[index + counter2]]
                    for p in range(window):
                        ls[word1_id] += 1
                        ls[word2_id] += 1
                        counter += 1
                        counter2 += 1
                        
        return np.array(ls, dtype = 'uint8')

    def get_coocurrenceMatrix(self,corpus: list,id_word: dict, word_id: dict, window:int):
        ls = []
        append = ls.append
        total = len(word_id)
        begin = time()
        for index, i in enumerate(word_id):
            append(self.get_single_context(id_word, word_id, corpus, i, window))
            print_result(index+1, total, begin, time())
        return np.array(ls, dtype = 'uint8'), ls
    
    def create_context_target(self, corpus, windowsize = 1):
        target = corpus[1 : -1]
        context = []
        cs = []
        cs_append = cs.append
        context_append = context.append
        for i in range(windowsize, len(corpus)-1):
            cs.append(corpus[i-1])
            cs.append(corpus[i+1])
            context.append(cs)
            cs=[]
        return np.array(context), np.array(target)
    
    def convert_onehot(self, context, target, length):
        zero_context = np.zeros(shape=(*context.shape, length), dtype = 'uint8')
        zero_target = np.zeros(shape=(*target.shape, length), dtype = 'uint8')
        for index, i in enumerate(context):
            for index2, k in enumerate(i):
                zero_context[index, index2, k] = 1
        for index, i in enumerate(target):
                zero_target[index, i] = 1
        return zero_context, zero_target
    
    def PPMI(self, co_matrix, corpus, verbose=True):
        ppmi_matrix = np.zeros_like(co_matrix, dtype=np.float32)
        N = np.sum(co_matrix)
        sigle_word = np.sum(co_matrix, axis = 0)
        total = co_matrix.shape[0]*co_matrix.shape[1]
        cols = co_matrix.shape[1]
        cnt = 0
        begin = time()
        for i in range(co_matrix.shape[0]):
            for j in range(co_matrix.shape[1]):
                ppmi = np.log2(co_matrix[i,j]*N/(sigle_word[i]*sigle_word[j]) + 1e-8)
                ppmi_matrix[i,j] = max(0, ppmi)
                if verbose:
                    cnt += 1
                    if cnt % (total//200) == 0:
                        print_result(cnt+1,total, begin, time())
        return ppmi_matrix

    def most_similar(self, matrix:list, word:str,word_id:dict, top:int):
        word = word.lower()
        if word not in word_id:
            return
        word_use_vector = matrix[word_id[word]]
        ls = {id_word[index]:similarity(word_use_vector, i) for index, i in enumerate(matrix) if index is not word_id[word]}
        return sorted(ls.items(),key=lambda x:x[1],reverse=True)[:top]

    def similarity(self, vect1, vect2):
        x = vect1/(np.sqrt(np.sum(vect1**2)) + 1e-8)
        y = vect2/(np.sqrt(np.sum(vect2**2)) + 1e-8)
        return np.dot(x,y)


class MatMul:

    def __init__(self, W):
        self.weights = [W]
        self.X = None
        self.gradients = [cp.zeros_like(W)]

    def forward(self, forward_input):
        W, = self.weights
        output = cp.dot(forward_input, W)
        self.X = forward_input
        return output

    def backward(self, d_backward_input):
        # get weights and calculate dX
        W = self.weights[0]
        dX = cp.dot(d_backward_input, W.T)

        # use stored input to and dinput to calculate dW and store to self.gradients list
        dW = cp.dot(self.X.T, d_backward_input)
        self.gradients[0][...] = dW

        return dX
class timeSoftmaxWithLoss:
    def __init__(self):
        self.params, self.grads = [], []
        self.cache = None
        self.ignore_label = -1

    def forward(self, xs, ts):
        N, T, V = xs.shape

        if ts.ndim == 3:  # 教師ラベルがone-hotベクトルの場合
            ts = ts.argmax(axis=2)

        mask = (ts != self.ignore_label)

        # バッチ分と時系列分をまとめる（reshape）
        xs = xs.reshape(N * T, V)
        ts = ts.reshape(N * T)
        mask = mask.reshape(N * T)

        ys = softmax(xs)
        ls = np.log(ys[np.arange(N * T), ts])
        ls *= mask  # ignore_labelに該当するデータは損失を0にする
        loss = -np.sum(ls)
        loss /= mask.sum()

        self.cache = (ts, ys, mask, (N, T, V))
        return loss

In [42]:
with open('untitled.txt', mode="r") as fp:
         string = fp.read()
preprocessed = Preprocess(string, ',', '.')
word_id, id_word, corpus = preprocessed.get_word_id()
context, target = preprocessed.create_context_target(corpus)
context_onehot, target_onehot = preprocessed.convert_onehot(context, target, len(word_id))
rn = cp.random.randn
V, D, H = 1000, 5, 8
embed_w = rn(V, D)
rnn_wx = rn(D, H)
rnn_wh = rn(H, H)
rnn_b = cp.zeros(H)
affine_w = rn(H, V)
affine_b = cp.zeros(V)
x = cp.array(corpus[:2]).reshape(1, 2)
y = cp.array(corpus[3:5]).reshape(1, 2)
array = cp.array([x, y]).reshape(2, 2)


In [43]:
#forward
xs = TimeEmbedding(embed_w, 2).forward(array)
xs = TimeRNN(rnn_wx, rnn_wh, rnn_b).forward(xs)
xs = TimeAffine(affine_w, affine_b).forward(xs)

target = cp.array(corpus[1:2+1]).reshape(1, 2)
target2 = cp.array(corpus[3+1:5+1]).reshape(1, 2)
target = cp.array([target, target2]).reshape(2, 2)
loss = TimeSoftmaxWithLoss(2).forward(xs, target)
loss

array(9.29889034)

In [44]:
x = np.array([[[1,2,3],[3,4,5]],[[5,6,7],[7,8,9]]])
print(x)
x.shape

[[[1 2 3]
  [3 4 5]]

 [[5 6 7]
  [7 8 9]]]


(2, 2, 3)

In [45]:
y = np.array([[1, 2, 3, 1, 1, 1],[3, 4, 5, 1, 1, 1],[5, 6, 7, 1, 1, 1]])
print(y)
y.shape

[[1 2 3 1 1 1]
 [3 4 5 1 1 1]
 [5 6 7 1 1 1]]


(3, 6)

In [46]:
np.dot(x, y)

array([[[ 22,  28,  34,   6,   6,   6],
        [ 40,  52,  64,  12,  12,  12]],

       [[ 58,  76,  94,  18,  18,  18],
        [ 76, 100, 124,  24,  24,  24]]])

In [47]:
y[:,0]

array([1, 3, 5])

In [48]:
x[0,0,:]

array([1, 2, 3])

In [49]:
sum(x[0,0,:]*y[:,0])

22

In [50]:
sum(x[0,0,:]*y[:,1])

28