<a href="https://colab.research.google.com/github/6ma6X/nn4nlp-code/blob/master/CBOW.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
from torch.autograd import Variable

In [7]:
class CBoW(torch.nn.Module):
    # 埋め込みの次元が emb_size、これは nwords とは大きさ異なる
    def __init__(self, nwords, ntags, emb_size):
        super(CBoW, self).__init__()

        self.embedding = nn.Embedding(nwords, emb_size)
        nn.init.xavier_uniform_(self.embedding.weight)

        # emb_size を nwords に線形変換するのが linear
        self.linear = nn.Linear(emb_size, ntags)
        nn.init.xavier_uniform_(self.linear.weight)

    def forward(self, words):
        emb = self.embedding(words)
        emb_sum = torch.sum(emb, dim=0) # size(emb_sum) = emb_size
        emb_sum = emb_sum.view(1, -1) # size(emb_sum) = 1 x emb_size
        out = self.linear(emb_sum) # size(out) = 1 x ntags 
        return out

In [8]:
torch.cuda.is_available()

True

In [9]:
from collections import defaultdict
import time
import random
import numpy as np

# Functions to read in the corpus
w2i = defaultdict(lambda: len(w2i))
t2i = defaultdict(lambda: len(t2i))
UNK = w2i["<unk>"]
def read_dataset(filename):
    with open(filename, "r") as f:
        for line in f:
            tag, words = line.lower().strip().split(" ||| ")
            yield ([w2i[x] for x in words.split(" ")], t2i[tag])

# Read in the data
train = list(read_dataset("train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("test.txt"))
nwords = len(w2i)
ntags = len(t2i)

# initialize the model
EMB_SIZE=64
model = CBoW(nwords, ntags, EMB_SIZE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

type = torch.LongTensor
use_cuda = torch.cuda.is_available()

if use_cuda:
    type = torch.cuda.LongTensor
    model.cuda()


for ITER in range(100):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    start = time.time()
    for words, tag in train:
        words = torch.tensor(words).type(type)
        tag = torch.tensor([tag]).type(type)
        scores = model(words)
        loss = criterion(scores, tag)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("iter %r: train loss/sent=%.4f, time=%.2fs" % (
                ITER, train_loss/len(train), time.time()-start))
    # Perform testing
    test_correct = 0.0
    for words, tag in dev:
        words = torch.tensor(words).type(type)
        scores = model(words)[0].detach().cpu().numpy()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=1.4061, time=8.74s
iter 0: test acc=0.3973
iter 1: train loss/sent=0.8956, time=8.82s
iter 1: test acc=0.4208
iter 2: train loss/sent=0.5179, time=8.61s
iter 2: test acc=0.3855
iter 3: train loss/sent=0.3201, time=8.66s
iter 3: test acc=0.3846
iter 4: train loss/sent=0.2196, time=8.68s
iter 4: test acc=0.3896
iter 5: train loss/sent=0.1578, time=8.87s
iter 5: test acc=0.3683
iter 6: train loss/sent=0.1131, time=8.50s
iter 6: test acc=0.3679
iter 7: train loss/sent=0.0840, time=8.67s
iter 7: test acc=0.3570
iter 8: train loss/sent=0.0617, time=8.57s
iter 8: test acc=0.3552
iter 9: train loss/sent=0.0443, time=8.66s
iter 9: test acc=0.3511
iter 10: train loss/sent=0.0332, time=8.55s
iter 10: test acc=0.3652
iter 11: train loss/sent=0.0272, time=8.54s
iter 11: test acc=0.3602
iter 12: train loss/sent=0.0191, time=8.68s
iter 12: test acc=0.3647
iter 13: train loss/sent=0.0168, time=8.76s
iter 13: test acc=0.3697
iter 14: train loss/sent=0.0130, time=8.58s
iter 14: t

KeyboardInterrupt: ignored

In [10]:
class DeepCBoW(torch.nn.Module):
    # nalyers でレイヤー数指定
    # hid_size は隠れ層の次元
    def __init__(self, nwords, ntags, nlayers, emb_size, hid_size):
        super(DeepCBoW, self).__init__()

        """ variables """
        self.nlayers = nlayers

        """ layers """
        self.embedding = nn.Embedding(nwords, emb_size)
        # initialize the weights with xavier uniform (Glorot, X. & Bengio, Y. (2010))
        nn.init.xavier_uniform_(self.embedding.weight)

        # add nlayers number of layers
        # 最初だけ（埋め込み次元数, 隠れ層）, あとは（隠れ層, 隠れ層）
        self.linears = nn.ModuleList([
                nn.Linear(emb_size if i == 0 else hid_size, hid_size) \
                for i in range(nlayers)])
        # initialize the weights with xavier uniform (Glorot, X. & Bengio, Y. (2010))
        for i in range(nlayers):
            nn.init.xavier_uniform_(self.linears[i].weight)

        # （隠れ層, タグ）
        self.output_layer = nn.Linear(hid_size, ntags)
        # initialize the weights with xavier uniform (Glorot, X. & Bengio, Y. (2010))
        nn.init.xavier_uniform_(self.output_layer.weight)

    def forward(self, words):
        emb = self.embedding(words)
        emb_sum = torch.sum(emb, dim=0) # size(emb_sum) = emb_size
        h = emb_sum.view(1, -1) # size(h) = 1 x emb_size
        for i in range(self.nlayers):
            h = torch.tanh(self.linears[i](h))
        out = self.output_layer(h)
        return out

In [12]:
# initialize the model
EMB_SIZE = 64    # 埋め込み次元
HID_SIZE = 64    # 隠れ層
NLAYERS = 2      # 隠れ層レイヤー数
model = DeepCBoW(nwords, ntags, NLAYERS, EMB_SIZE, HID_SIZE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

type = torch.LongTensor
use_cuda = torch.cuda.is_available()

if use_cuda:
    type = torch.cuda.LongTensor
    model.cuda()


for ITER in range(100):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    start = time.time()
    for words, tag in train:
        words = torch.tensor(words).type(type)
        tag = torch.tensor([tag]).type(type)
        scores = model(words)
        loss = criterion(scores, tag)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("iter %r: train loss/sent=%.4f, time=%.2fs" % (
                ITER, train_loss/len(train), time.time()-start))
    # Perform testing
    test_correct = 0.0
    for words, tag in dev:
        words = torch.tensor(words).type(type)
        scores = model(words)[0].detach().cpu().numpy()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))

iter 0: train loss/sent=1.4326, time=14.54s
iter 0: test acc=0.3665
iter 1: train loss/sent=1.0194, time=14.27s
iter 1: test acc=0.3982
iter 2: train loss/sent=0.6498, time=14.41s
iter 2: test acc=0.3919
iter 3: train loss/sent=0.3950, time=14.29s
iter 3: test acc=0.3688
iter 4: train loss/sent=0.2503, time=14.16s
iter 4: test acc=0.3751
iter 5: train loss/sent=0.1676, time=14.32s
iter 5: test acc=0.3742
iter 6: train loss/sent=0.1107, time=14.38s
iter 6: test acc=0.3783
iter 7: train loss/sent=0.0710, time=14.33s
iter 7: test acc=0.3756


KeyboardInterrupt: ignored