基于计数方法的问题
- 矩阵庞大，SVD缓慢  
- 添加新词需要重新计算
基于推理的方法
- 给定上下文，预测单词
- 可以根据上一次的权重增量学习  
- 就单词相似性的定量评价而言，推理和技术方法难分上下
- 使用skip-gram被证明和修改过的共现矩阵有相同作用  
word2vec
- CBOW $L=-\frac{1}{T}\sum^T_{t=1}{\log P(w_t|w_{t-1},w_{t+1})}$
- skip-gram 使用W_in $L=-(\frac{1}{T}\sum^T_{t=1}{\log P(w_{t-1}|w_{t})+\log P(w_{t+1}|w_{t})})$  任务更难，效果更好
glove
- 使用W_in和W_out 的和  




In [None]:
import sys
sys.path.append('..')
import numpy as np
from common.layers import MatMul, SoftmaxWithLoss
from common.trainer import Trainer
from common.optimizer import Adam
from common.util import preprocess,create_contexts_target,convert_one_hot

class SimpleCBOW:
    def __init__(self,vocab_size,hidden_size):
        V,H = vocab_size, hidden_size

        W_in = np.random.randn(V,H).astype('f')
        W_out = np.random.randn(H,V).astype('f')


        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [] ,[]

        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads
        self.word_vecs = W_in

    def forward(self,contexts,t):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = 0.5 * (h0 + h1)
        s = self.out_layer.forward(h)
        loss = self.loss_layer.forward(s,t)
        return loss

    def backward(self,dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None


In [None]:
window_size = 1
hidden_size = 5
batch_size = 3
max_epoch = 1000

text = 'You say goodbye and I say hello.'
corpus,word_to_id,id_to_word=preprocess(text)
vocab_size = len(word_to_id)
contexts , target = create_contexts_target(corpus,window_size)
target = convert_one_hot(target,vocab_size)
contexts = convert_one_hot(contexts, vocab_size)
model = SimpleCBOW(vocab_size,hidden_size)
optimizer = Adam()
trainer = Trainer(model,optimizer)
trainer.fit(contexts,target,max_epoch,batch_size)
trainer.plot()


In [None]:
word_vecs = model.word_vecs
for word_id, word in id_to_word.items():
    print(word,word_vecs[word_id])