# 4章コードの補足

In [53]:
import sys
sys.path.append('..')
import numpy as np
import collections
from common.layers import Embedding, SigmoidWithLoss
from common.util import create_contexts_target

# negative_sampling_layer

## EmbeddingDot
- CBOWモデルの多値分類を二値分類で近似するときに，中間層->出力層の処理を担うレイヤ．
- 正解単語のEmbedding，その単語ベクトルと中間層の値の内積を実行する．

### 初期化 init
- 引数として重みWを受け取る

In [64]:
W_out = np.random.rand(10, 3)  # 出力層側の重み. 語彙数10, word_vec_size=3の想定
embed = Embedding(W_out)  # Embeddingレイヤを生成
grads = embed.grads  # Embeddingレイヤの勾配を保持
cache = None  # backwardで使う値をfoward時に保持する変数

### forward
- 引数の h は中間層のニューロン，idx は正解単語IDの配列

In [65]:
h = np.random.rand(5, 3)  # 中間層のニューロン. batch_size=5, word_vec_size=3 の想定．
idx = np.array([0, 1, 2, 0, 5])  # 正解の単語ID
print(f'中間層 h: \n {h}')
print(f'正解単語ID idx: \n {idx}')

中間層 h: 
 [[0.30350962 0.10335685 0.99940323]
 [0.97747561 0.26188951 0.99611099]
 [0.05279184 0.93906718 0.25935491]
 [0.52469793 0.98596332 0.39542631]
 [0.81237178 0.27725553 0.14723554]]
正解単語ID idx: 
 [0 1 2 0 5]


In [66]:
target_W_out = embed.forward(idx)  # 正解単語の重みのみを抜き出す
print(f'W_out: \n {W_out}')
print(f'target_W_out: \n {target_W_out}')

W_out: 
 [[0.37232478 0.92332719 0.27347781]
 [0.98571088 0.7818164  0.30063887]
 [0.66985102 0.93755263 0.06360142]
 [0.47206289 0.41889419 0.65294163]
 [0.6066007  0.72044264 0.51669946]
 [0.66973063 0.85178689 0.80990261]
 [0.19030978 0.19615922 0.86839846]
 [0.45885484 0.8345593  0.53515815]
 [0.40357728 0.0709034  0.45710072]
 [0.76395917 0.62873386 0.07791518]]
target_W_out: 
 [[0.37232478 0.92332719 0.27347781]
 [0.98571088 0.7818164  0.30063887]
 [0.66985102 0.93755263 0.06360142]
 [0.37232478 0.92332719 0.27347781]
 [0.66973063 0.85178689 0.80990261]]


In [67]:
out = np.sum(target_W_out * h, axis=1)  # 正解単語の重みと中間層の内積計算
print(f'out: \n {out}')

out: 
 [0.48175094 1.46772754 0.93228291 1.21386511 0.89947934]


In [68]:
cache = (h, target_W_out)  # backward用

### backward
- 勾配 dout を受け取る

In [69]:
dout = np.random.rand(*out.shape)
print(f'dout: \n {dout}')

dout: 
 [0.53119116 0.55708182 0.55250584 0.28722379 0.19789887]


In [70]:
h, target_W_out = cache

In [71]:
dout = dout.reshape(dout.shape[0], 1)  # 二次元に変換
print(f'reshaped dout: \n {dout}')

reshaped dout: 
 [[0.53119116]
 [0.55708182]
 [0.55250584]
 [0.28722379]
 [0.19789887]]


In [72]:
dtarget_W_out = dout * h  # 内積の逆伝播
print(f'dtarget_W_out: \n {dtarget_W_out}')

dtarget_W_out: 
 [[0.16122163 0.05490224 0.53087416]
 [0.54453389 0.14589388 0.55491532]
 [0.0291678  0.5188401  0.1432951 ]
 [0.15070573 0.28319213 0.11357585]
 [0.16076746 0.05486856 0.02913775]]


In [73]:
print(f'grads: \n {grads}', end='\n\n')
embed.backward(dtarget_W_out)  # Embeddingレイヤの逆伝播．勾配を更新．
print(f'updated grads: \n {grads}')

grads: 
 [array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])]

updated grads: 
 [array([[0.31192735, 0.33809437, 0.64445   ],
       [0.54453389, 0.14589388, 0.55491532],
       [0.0291678 , 0.5188401 , 0.1432951 ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.16076746, 0.05486856, 0.02913775],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ]])]


## UnigramSampler
- CBOWモデルの高速化の後半部分．
- 負例をランダムに抽出して学習させる際の選び方．
- コーパス中の単語の出現確率に従ってサンプリングする．

### 初期化 init

In [21]:
corpus = np.array([1, 1, 3, 2, 1, 1, 0, 3, 4, 5, 0, 0])  # コーパスは単語IDのリスト
power = 0.75  # 確率に1未満で累乗し，低頻度の単語に下駄をはかせる
sample_size = 3  # サンプリングする数

# Counterでコーパス中の単語の出現回数をカウントできる
counts = collections.Counter()
for word_id in corpus:
    counts[word_id] += 1
print(counts)
print(counts[0])

Counter({1: 4, 0: 3, 3: 2, 2: 1, 4: 1, 5: 1})
3


In [23]:
vocab_size = len(counts)  # 語彙数 = countsの長さ

p = np.zeros(vocab_size)  # 語彙数と同じ要素数の配列で確率を保持する

# 各単語IDの出現回数を格納
for i in range(vocab_size):
    p[i] = counts[i]

# 出現回数を0.75乗して稀な単語の確率に少し下駄をはかせる
word_p = np.power(p, power)
print(f'original p: {p}')
print(f'powerd word_p: {word_p}', end='\n\n')

# np.sum(p) = 単語数 で割って確率にする
p /= np.sum(p)
word_p /= np.sum(word_p)
print(f'p_out: {p}')
print(f'word_p_out: {word_p}')

original p: [3. 4. 1. 2. 1. 1.]
powerd word_p: [2.27950706 2.82842712 1.         1.68179283 1.         1.        ]

p_out: [0.25       0.33333333 0.08333333 0.16666667 0.08333333 0.08333333]
word_p_out: [0.23284685 0.28891787 0.10214789 0.1717916  0.10214789 0.10214789]


### get_negative_sample

In [25]:
# コンテキストとターゲットを作る
window_size = 1
contexts, target = create_contexts_target(corpus, window_size)

In [18]:
batch_size = target.shape[0]

In [19]:
negative_sample = np.zeros((batch_size, sample_size), dtype=np.int32)
print(negative_sample)

[[0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]]


In [26]:
for i in range(batch_size):
    p = word_p.copy()  # 確率を取得
    target_idx = target[i]  # ターゲットを保持
    p[target_idx] = 0  # ターゲットの確率は0
    p /= p.sum()  # ターゲットを除いて確率を再計算
    negative_sample[i, :] = np.random.choice(vocab_size, size=sample_size, replace=True, p=word_p)

In [27]:
print(negative_sample)

[[1 4 2]
 [4 0 5]
 [5 0 1]
 [1 3 2]
 [1 0 0]
 [1 4 1]
 [1 1 0]
 [3 4 2]
 [3 0 0]
 [3 1 0]]


## NegativeSamplingLoss

### 初期化 init