In [22]:
import numpy as np
import matplotlib.pyplot as plt
from common.layers import *

In [15]:
# コンテキストベクトルの擬似計算

T, H = 5, 4
hs = np.random.randn(T, H) # 各時系列の隠れベクトル
a = np.array([0.8, 0.1, 0.03, 0.05, 0.02]) # 各単語の重み

# 重みのブロードキャスト
ar = a.reshape(5, 1).repeat(4, axis=1)
print(ar.shape)

# 積
t = hs *ar
print(t.shape)

# 和
c = np.sum(t, axis=0)
print(c.shape)

(5, 4)
(5, 4)
(4,)


In [6]:
np.dot(a, hs) # バッチだとこの積は容易ではない。

array([-1.79017492,  0.30723916, -0.87767826,  0.71320593])

In [8]:
np.sum(ar * hs, axis=0)

array([-1.79017492,  0.30723916, -0.87767826,  0.71320593])

In [13]:
test = a.reshape(5, 1)
np.sum(test * hs,0)

array([-1.79017492,  0.30723916, -0.87767826,  0.71320593])

In [19]:
# バッチ処理の場合
N, T, H = 10, 5, 4
hs = np.random.randn(N, T, H)
a = np.random.randn(N, T)

ar = a.reshape(N, T, 1).repeat(H, axis = 2)

t = hs * ar
print(t.shape)

c = np.sum(t, axis = 1)
print(c.shape)

(10, 5, 4)
(10, 4)


In [21]:
class WeightSum:
    def __init__(self):
        self.params = [], self.grads = []
        self.cache = None


    def forward(self, hs, a):
        N, T, H = hs.shape

        ar = a.reshape(N, T, 1).repeat(H, axis= 2)
        t = hs * ar
        c = np.sum(t, axis=1)
        self.cache = (hs, ar)
        return c


    def backward(self, dc): # dcのshape:(N, H)
        hs, ar = self.cache
        N, T, H = hs.shape
        dt = dc.reshape(N, 1, H).repeat(T, axis=1)
        dar = dt * hs
        dhs = dt * ar
        da = np.sum(dar, axis=2) # daのshape:(N, T)
        return dhs, da

In [26]:
# 重みの計算
# バッチ処理の場合
N, T, H = 10, 5, 4
hs = np.random.randn(N, T, H)
h = np.random.randn(N, H)

hr = h.reshape(N, 1, H).repeat(T, axis = 1)

# 内積で類似を算出
t = hs * hr
print(t.shape)

s = np.sum(t, axis = 2) # shape:(N, T)
print(s.shape)

# 確率へ
softmax = Softmax()
a = softmax.forward(s)
print(a.shape)



(10, 5, 4)
(10, 5)
(10, 5)


In [27]:
class AttentionWeight:
    def __init__(self):
        self.params, self.grads = [], []
        self.softmax = Softmax()
        self.cache = None


    def forward(self, hs, h):
        N, T, H = hs.shape
        
        hr = h.reshape(N, 1, H).repeat(T, axis= 1)
        t = hs * hr
        s = np.sum(t, axis=2)
        a = self.softmax.forward(s)
        self.cache = (hs, hr)
        
        return a


    def backward(self, da):
        hs, hr = self.cache
        N, T, H = hs.shape
        
        ds = softmax.backward(da)
        dt = ds.reshape(N, T, 1).repeat(H, axis=2)
        dhs = dt * hr
        dhr = dt * hs
        dh = np.sum(dhr, axis=1)
        
        return dhs, dh