In [61]:
print('hello world')

hello world


In [62]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

class TransformerModel(nn.Module):

    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
        super(TransformerModel, self).__init__()
        from torch.nn import TransformerEncoder, TransformerEncoderLayer
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder = nn.Embedding(ntoken, ninp)
        self.ninp = ninp
        self.decoder = nn.Linear(ninp, ntoken)

        self.init_weights()

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src, src_mask):
        src = self.encoder(src) * math.sqrt(self.ninp)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, src_mask)
        output = self.decoder(output)
        return output

In [63]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [64]:
!pip install janome



In [65]:
import janome
from janome.tokenizer import Tokenizer
from torchtext import data
from torchtext import datasets

In [66]:
j_t = Tokenizer()
def tokenizer(text): 
    return [tok for tok in j_t.tokenize(text, wakati=True)]

In [67]:
SRC = data.Field(sequential=True, tokenize=tokenizer,init_token='<sos>',
                            eos_token='<eos>', lower=True, fix_length = 100)
TRG = data.Field(sequential=True, tokenize=tokenizer,init_token='<sos>',
                            eos_token='<eos>', lower=True, fix_length = 100)

In [68]:
train, val, test = data.TabularDataset.splits(
        path="./", train='train.tsv',
        validation='val.tsv', test='test.tsv', format='tsv',
        fields=[('SRC', SRC), ('TRG', TRG)])

In [69]:
SRC.build_vocab(train, min_freq=1)
TRG.build_vocab(train, min_freq=1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [70]:
train_batch_size = 100
test_batch_size = 10
eval_batch_size = 50
train_iter, val_iter, test_iter = data.BucketIterator.splits((train, val, test), sort = False,  batch_sizes = (train_batch_size,eval_batch_size, test_batch_size), device= device)

In [71]:
len(train_iter.data())

20000

In [72]:
ntokens = len(SRC.vocab.stoi) # the size of vocabulary
emsize = 200 # embedding dimension
nhid = 200 # the dimension of the feedforward network model in nn.TransformerEncoder
nlayers = 2 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
nhead = 2 # the number of heads in the multiheadattention models
dropout = 0.5 # the dropout value
model = TransformerModel(ntokens, emsize, nhead, nhid, nlayers, dropout).to(device)

In [87]:
criterion = nn.CrossEntropyLoss()
lr = 0.5 # learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

import time
def train(iterator):
    model.train() # Turn on the train mode
    total_loss = 0.
    start_time = time.time()
    ntokens = len(SRC.vocab.stoi)
    for i, batch in enumerate(iterator):
        data = batch.SRC
        targets = batch.TRG
        optimizer.zero_grad()
        src_mask = model.generate_square_subsequent_mask(data.shape[0]).to(device)
        output = model(data, src_mask)      
        output = output[1:].view(-1, output.shape[-1])
        print("output:",output)
        targets = targets[1:].view(-1)
        print("target:",targets)
        loss = criterion(output, targets)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss.item()
        

def evaluate(eval_model, data_source):
    eval_model.eval() # Turn on the evaluation mode
    total_loss = 0.
    ntokens = len(SRC.vocab.stoi)
    #src_mask = model.generate_square_subsequent_mask(bptt).to(device)
    with torch.no_grad():
        for i, batch in enumerate(data_source):
            data = batch.SRC
            targets = batch.TRG
            src_mask = model.generate_square_subsequent_mask(data.shape[0]).to(device)
            output = eval_model(data, src_mask)
            output_flat = output[1:].view(-1, output.shape[-1])
            targets = targets[1:].view(-1)
            total_loss += len(data) * criterion(output_flat, targets).item()
    return total_loss / (len(data_source) - 1)

In [88]:
best_val_loss = float("inf")
epochs = 100# The number of epochs
best_model = None
model.init_weights()

for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    train(train_iter)
    val_loss = evaluate(model, val_iter)
    print('-' * 89)
    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
          'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                     val_loss, math.exp(val_loss)))

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model

    scheduler.step()

output: tensor([[-0.3151, -1.0659,  0.7573,  ...,  0.5115,  0.1632,  0.4482],
        [ 0.3361, -0.2336,  1.2291,  ...,  1.5279,  0.6473,  0.3624],
        [ 0.8654,  0.0977,  1.0907,  ...,  0.7570, -0.0233, -0.6266],
        ...,
        [-0.2755,  0.4157,  0.4161,  ...,  1.2380,  0.7114, -1.2725],
        [ 0.1736,  0.8402,  0.4540,  ...,  1.1244,  1.1582, -0.4806],
        [ 0.2649,  0.8800,  1.1043,  ...,  1.3826,  0.3915, -0.8756]],
       device='cuda:0', grad_fn=<ViewBackward>)
target: tensor([10,  9, 14,  ...,  1,  1,  1], device='cuda:0')
output: tensor([[ 0.5242,  1.2721,  1.6303,  ..., -0.5260,  0.4322, -1.6451],
        [-0.2376,  0.8409,  1.3645,  ...,  0.7964,  0.8104, -0.4069],
        [ 0.3258,  1.5191,  1.1099,  ...,  0.3823, -0.7987, -0.5605],
        ...,
        [ 0.0449,  2.8392,  1.3458,  ...,  1.0995,  0.1270, -1.2990],
        [-0.7032,  4.6781,  1.0259,  ...,  0.3543,  0.6894, -0.8818],
        [ 0.0102,  4.2530,  0.0101,  ...,  1.2809, -0.3628, -1.6681]],
    

output: tensor([[ 8.1158e-01,  3.7769e+00, -1.7168e-01,  ...,  2.2176e-01,
          3.2634e-01,  1.0929e-02],
        [ 2.3546e-01,  5.4033e+00,  5.8836e-01,  ...,  1.7564e-01,
          9.1623e-01, -7.9196e-01],
        [-5.1732e-01,  4.4508e+00,  5.2427e-01,  ...,  1.0377e+00,
          6.6324e-01, -1.4671e+00],
        ...,
        [-1.0780e-01,  1.5837e+01,  1.2849e+00,  ...,  1.4888e+00,
         -2.9688e-01, -1.2653e+00],
        [-2.3936e-02,  1.5726e+01,  6.7440e-01,  ...,  8.7939e-01,
          1.1064e-01, -1.5659e+00],
        [ 1.0981e+00,  1.4519e+01,  3.9001e-01,  ...,  1.0180e+00,
          3.0571e-01, -1.3545e+00]], device='cuda:0', grad_fn=<ViewBackward>)
target: tensor([ 9, 29, 29,  ...,  1,  1,  1], device='cuda:0')
output: tensor([[-0.6411,  7.8181,  0.8823,  ...,  1.3931,  0.6768, -0.8183],
        [ 0.2839,  7.6508,  1.8709,  ...,  0.3338,  0.3718, -1.0078],
        [ 0.4986,  7.4654,  1.2732,  ...,  0.7366,  1.0533, -0.7831],
        ...,
        [ 0.7872, 15.894

output: tensor([[ 0.1079,  4.4961,  0.9175,  ...,  0.5126,  0.3808,  0.1991],
        [-0.4351,  2.5651,  1.7747,  ...,  0.2530,  0.8921, -0.1898],
        [ 0.2516,  4.6873,  0.3257,  ...,  0.3444,  0.2229, -0.5323],
        ...,
        [ 0.4446, 14.5260,  0.9656,  ...,  1.7403, -0.1812, -1.9533],
        [ 0.4354, 14.0759,  0.7197,  ...,  1.2294,  0.2000, -0.9090],
        [ 0.4747, 15.5304,  0.8057,  ...,  0.9872,  0.7091, -0.7105]],
       device='cuda:0', grad_fn=<ViewBackward>)
target: tensor([ 5, 62,  5,  ...,  1,  1,  1], device='cuda:0')
output: tensor([[-0.6851,  5.2641,  1.3471,  ...,  1.1625,  0.1446, -0.2097],
        [-0.7437,  8.2200,  0.1681,  ...,  0.5691,  0.4278, -1.0136],
        [ 0.1432,  7.5121,  1.0056,  ...,  0.6397,  0.6320, -1.0441],
        ...,
        [ 1.1508, 17.3925,  0.9113,  ...,  1.1536,  0.5828, -1.0979],
        [ 0.3608, 17.1995,  0.9168,  ...,  0.8463, -0.1223, -1.1825],
        [ 0.2552, 16.4421,  0.2866,  ...,  0.7782,  0.2108, -1.6620]],
    

KeyboardInterrupt: 

In [None]:
test_loss = evaluate(best_model, test_iter)
print('=' * 89)
print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
    test_loss, math.exp(test_loss)))
print('=' * 89)

In [None]:
torch.save(best_model.state_dict(), "./model/transformer.pth")

In [None]:
model.state_dict(torch.load("./model/transformer.pth"))

In [None]:
def gen_sentence(sentence, src_field, trg_field, model, batch_size):
  model.eval()
  in_str, out_str, pred, tmp = [], [], [], []
  length = len(sentence)

  with torch.no_grad():
    for _, batch in enumerate(sentence):
      src = batch.SRC
      trg = batch.TRG
      src_mask = model.generate_square_subsequent_mask(src.shape[0]).to(device)
      output = model(src, src_mask)
          
      for j in range(min(length, batch_size)):
        _, topi = output.data.topk(1)
        _, topi_s = output.data.topk(2) 
        for k in range(topi.size()[1]):
          if topi[:, k][0] == trg_field.vocab.stoi["<eos>"]:
            for m in range(topi_s.size()[0]):
              for l in range(topi_s.size()[1]):
                topi[m][l][0] = topi_s[m][l][1]
          for i in range(topi.size()[0]):
            if trg_field.vocab.itos[topi[:, k][i]] == "<eos>":
              break
            tmp.append(trg_field.vocab.itos[topi[:, k][i]])
          pred.append(tmp)
          print(tmp)
          tmp = []
        in_str.append([src_field.vocab.itos[i.item()] for i in src[:,j] if src_field.vocab.itos[i.item()] != "<eos>"])
        out_str.append([trg_field.vocab.itos[i.item()] for i in trg[:,j] if trg_field.vocab.itos[i.item()] != "<eos>"])
      
  return in_str, out_str, pred

In [None]:
# 中間発表時にはテストデータは用いない
test_in, test_out, test_pred = [],[],[]
test_in, test_out, test_pred = gen_sentence(test_iter, SRC, TRG, model, test_batch_size)

In [None]:
val_in, val_out, val_pred = [],[],[]
val_in, val_out, val_pred = gen_sentence(val_iter, SRC, TRG, best_model, eval_batch_size)

In [None]:
train_in, train_out, train_pred = [],[],[]
train_in, train_out, train_pred = gen_sentence(train_iter, SRC, TRG, best_model, train_batch_size)

In [None]:
import pandas as pd

In [None]:
def convert_list_to_df(in_list, out_list, pred_list):
  row = []
  for i in range(len(in_list)):
    batch_input = in_list[i]
    batch_output = out_list[i]
    batch_pred = pred_list[i]
    input = [j for j in batch_input if j != "<pad>" and j != "<sos>" and j != "<eos>" and j != "<unk>"]
    output = [j for j in batch_output if j != "<pad>" and j != "<sos>" and j != "<eos>" and j != "<unk>"]
    predict = [j for j in batch_pred if j != "<pad>" and j != "<sos>" and j != "<eos>" and j != "<unk>"]
    input_str = "".join(input)
    output_str ="".join(output)
    predict_str = "".join(predict)
    row.append([input_str, output_str, predict_str])

  df = pd.DataFrame(row, columns=["input","answer","predict"])
  df = df.sort_values('input')
  return df

In [None]:
train_df = convert_list_to_df(train_in, train_out, train_pred)
val_df = convert_list_to_df(val_in, val_out, val_pred)
test_df = convert_list_to_df(test_in, test_out, test_pred)

In [None]:
df_s = pd.concat([train_df, test_df]).sort_values('input')

In [None]:
df_s.head(10)

In [None]:
df_s.to_csv("./csv/result_transformer.csv")