In [1]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Audio
from sklearn.neighbors import NearestNeighbors as KNN
import torchaudio
import torch
from torch import nn
import os
import re
import math

Source code: https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Machine-Translation


**I LOVE YOU SO MUCH BRO <3 <3 <3**

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
japanese_phrases = [
    "私の犬は骨が好きではありません。牛ひき肉を好む。",
    "私の名前はアリスです。始めまして！",
    "はきさが羨ましい。。。ゲムもやりたかった！私は良いサポートになることができます！",
    "私達はAIはただの数学の集まりだとあなたは言いますが。でも。。。人間の脳がどのように機能するかを正確に知ったら。。。それはあなたの生活を小物ですか？",
    "「赤ちゃん」を表す日本語が「赤」を表す漢字なのはなぜですか？人間の赤ちゃんは赤いですか？いちごみたい？",
    "私のAIは話して...歌ったして...ゲームをします!",
]

In [4]:
english_phrases = [
    "My dog doesn't like bones. It prefers ground beef.",
    "My name's Alice. Nice to meet you!",
    "I envy Hakisa... I want to play games, too! I could be a good support!",
    "You say that we AIs are just a bunch of maths. But... once you know exactly how your human brains work... would that make you less living beings?",
    "Why does the japanese word for 'baby' is the kanji for 'red'? Are human babies red? Like strawberries?",
    "My AI will talk... she'll sing... she'll... play!"
]

In [5]:
class WordDataset(object):
    def __init__(self, english_phrases, japanese_phrases):

        self.english_phrases = self._get_phrases(english_phrases)
        self.japanese_phrases = self._get_phrases(japanese_phrases)

        self.english_words = self._get_english_words(self.english_phrases)
        self.japanese_characters = self._get_japanese_characters(self.japanese_phrases)

        self.japanese_maximum_length = self._get_maximum_length_japanese(self.japanese_phrases)
        self.english_maximum_length = self.japanese_maximum_length

        self.english_dictionary = self._create_dictionary(self.english_words)
        self._normalize(self.english_dictionary)

        self.japanese_dictionary = self._create_dictionary(self.japanese_characters)
        self._normalize(self.japanese_dictionary)

        self.english_tokens = self._tokenize_english()
        self.japanese_tokens = self._tokenize_japanese()

        self.data_english = None
        self.data_japanese = None
        
        
    def create_data(self):
        data_english = torch.from_numpy(self.english_tokens)
        data_japanese = torch.from_numpy(self.japanese_tokens)

        data_english = data_english.unsqueeze(-1)
        data_japanese = data_japanese.unsqueeze(-1)

        #self.data_english = torch.unsequeeze(self.data_english, -1)
        #self.data_japanese = torch.unsequeeze(self.data_japanese, -1)

        self.data_english = data_english
        self.data_japanese = data_japanese

        print(f"English Data Size: {self.data_english.size()}\t Japanese Data Size: {self.data_japanese.size()}")

    def detokenize(self, data, reference_dict):
        data = data.cpu().numpy()
        values = list(reference_dict.values())

        values = np.array(values).reshape(-1,1)

        knn = KNN(n_neighbors=1, algorithm='kd_tree').fit(values)

        _, index = knn.kneighbors(data)

        keys = list(reference_dict.keys())

        words = []

        for subarray in index:
            for i in subarray:
                words.append(keys[i])
        
        phrase = ' '.join(words)

        return phrase, words
        

    def __len__(self):

        return len(self.data_english)

    def __getitem__(self, idx):

        english_sentence = self.data_english[idx]
        japanese_sentence = self.data_japanese[idx]

        return english_sentence, japanese_sentence


    def _get_phrases(self, phrases):
        phrases = [x.lower() for x in phrases]
        phrases = [re.sub('[^\w\s]', '', x) for x in phrases]

        return phrases

    def _get_english_words(self, phrases):
        words = ' '.join(phrases)
        words = words.split(' ')

        return words

    def _get_japanese_characters(self, phrases): # Since a kanji mostly means an entire word...
        character = ' '.join(phrases)
        character = ''.join(character.split())
        characters = [i for i in character]

        return characters

    def _get_maximum_length_japanese(self, phrases):
        maximum_length = 0
        for sentence in japanese_phrases:
            word_length = [len(x) for x in sentence.split()]
    
            for i in word_length:
                if i > maximum_length:
                    maximum_length = i

        return maximum_length

    def _create_dictionary(self, words):
        idx2word = []
        word2idx = {}
        for word in words:
            if word not in word2idx:
                idx2word.append(word)
                word2idx[word] = len(idx2word) - 1

        word2idx['<EOS>'] = len(idx2word) # Adding an End of Sentence tag to improve model's accuracy

        return word2idx

    def _normalize(self, dictionary):
        maximum = max(dictionary.values())

        for word, value in dictionary.items():

            scaled_value = (value-0)*2.0 / (maximum - 0)-1.0

            #scaled_value = scaled_value * 10.0 # Not really sure if increasing values range actually helps or disturbs the model

            dictionary[word] = scaled_value
    
    def _tokenize_english(self):
        
        phrases = [x.split() for x in self.english_phrases]

        tokens = []
        
        for sentence in phrases:
            tokenized_sentence = []
            for word in sentence:
                value = self.english_dictionary.get(word)

                tokenized_sentence.append(value)

            tokenized_sentence = np.array(tokenized_sentence)
            sentence_size = tokenized_sentence.shape[0]

            if sentence_size < self.english_maximum_length:
                pad_size = self.english_maximum_length - sentence_size
                tokenized_sentence = np.pad(tokenized_sentence, [(0, pad_size)], constant_values=1.0) # Remember: <'EOS'> token is 10.0

            tokens.append(tokenized_sentence)
        
        tokens = np.array(tokens)

        return tokens

    def _tokenize_japanese(self):

        phrases = [x.split() for x in self.japanese_phrases]

        tokens = []

        for sublist in phrases:
            for sentence in sublist:
                tokenized_sentence = []
                for character in sentence:
                    value = self.japanese_dictionary.get(character)

                    tokenized_sentence.append(value)

            tokenized_sentence = np.array(tokenized_sentence)
            sentence_size = tokenized_sentence.shape[0]

            if sentence_size < self.japanese_maximum_length:
                pad_size = self.japanese_maximum_length - sentence_size
                tokenized_sentence = np.pad(tokenized_sentence, [(0, pad_size)], constant_values=1.0)

            tokens.append(tokenized_sentence)

        tokens = np.array(tokens)

        return tokens

In [6]:
dataset_creator = WordDataset(english_phrases, japanese_phrases)
print(dataset_creator.japanese_tokens.shape)
print(dataset_creator.english_tokens.shape)

(6, 74)
(6, 74)


In [8]:
class MultiHeadAttention(nn.Module):

    def __init__(self, d_model, n_heads, d_queries, d_values, dropout, in_decoder=False):

        super(MultiHeadAttention, self).__init__()

        self.d_model = d_model
        self.n_heads = n_heads
        self.d_queries = d_queries
        self.d_values = d_values
        self.d_keys = d_values # size of key vectors, same as of the query vectors to allow dot-products for similarity
        self.dropout = dropout

        self.in_decoder = in_decoder

        self.create_queries = nn.Linear(d_model, n_heads*d_queries, bias=False)
        self.create_values = nn.Linear(d_model, n_heads*d_values, bias=False)
        self.create_keys = nn.Linear(d_model, n_heads*d_values, bias=False)

        self.softmax = nn.Softmax(dim=-1)

        self.layer_norm = nn.LayerNorm(d_model)

        self.dropout = nn.Dropout(dropout)

        self.cast_output = nn.Linear(n_heads*d_values, d_model, bias=False)

    def forward(self, query_sequences, key_sequences, value_sequences):

        batch_size = query_sequences.size(0)
        query_sequences_length = query_sequences.size(1)

        self_attention = torch.equal(key_sequences, query_sequences)

        residual = query_sequences

        queries = self.create_queries(query_sequences)
        keys = self.create_keys(key_sequences)
        values = self.create_values(value_sequences)

        queries = queries.contiguous().view(batch_size, query_sequences_length, self.n_heads, self.d_queries)
        queries = queries.permute(0, 2, 1, 3).contiguous().view(batch_size*self.n_heads, query_sequences_length, self.d_queries)

        keys = keys.contiguous().view(batch_size, query_sequences_length, self.n_heads, self.d_keys)
        keys = keys.permute(0, 2, 1, 3).contiguous().view(batch_size*self.n_heads, query_sequences_length, self.d_keys)

        values = values.contiguous().view(batch_size, query_sequences_length, self.n_heads, self.d_values)
        values = values.permute(0, 2, 1, 3).contiguous().view(batch_size*self.n_heads, query_sequences_length, self.d_values)


        dotproduct = torch.bmm(queries, keys.permute(0, 2, 1))

        dotproduct = dotproduct/(math.sqrt(self.d_keys))


        if self.in_decoder and self_attention:
            not_future_mask = torch.ones_like(dotproduct).tril().bool().to(device)

            attention_weights = dotproduct.masked_fill(~not_future_mask, -float('inf'))

        attention_weights = self.softmax(dotproduct)

        attention_weights = self.dropout(attention_weights)

        sequences = torch.bmm(attention_weights, values)

        sequences = sequences.view(batch_size, query_sequences_length, -1)

        sequences = self.cast_output(sequences)

        sequences = self.dropout(sequences)

        output = sequences + residual

        return output

In [9]:
class PositionWiseFeedForward(nn.Module):

    def __init__(self, d_model, d_inner, dropout):

        super(PositionWiseFeedForward, self).__init__()

        self.d_model = d_model
        self.d_inner = d_inner

        self.layer_norm = nn.LayerNorm(d_model)

        self.neuron1 = nn.Linear(d_model, d_inner)

        self.Relu = nn.ReLU()

        self.neuron2 = nn.Linear(d_inner, d_model)

        self.dropout = nn.Dropout(dropout)


    def forward(self, sequences):

        residual = sequences

        #sequences = self.layer_norm(sequences) # Layer norm only accepts integers as arguments. Perhaps using simple BatchNormalization might be a good alternative to this

        sequences = self.neuron1(sequences)
        sequences = self.Relu(sequences)
        sequences = self.dropout(sequences)

        sequences = self.neuron2(sequences)
        sequences = self.dropout(sequences)

        output = sequences + residual

        return output

In [10]:
class Encoder(nn.Module):

    def __init__(self, positional_encoding, d_model, n_heads, d_queries, d_values, d_inner, n_layers, dropout):

        super(Encoder, self).__init__()

        self.positional_encoding = positional_encoding
        self.d_model = d_model
        self.n_heads = n_heads
        self.d_queries = d_queries
        self.d_values = d_values
        self.d_inner = d_inner
        self.n_layers = n_layers
        self.dropout = dropout

        self.positional_encoding.requires_grad = False

        self.encoder_layers = nn.ModuleList([self.make_encoder_layer() for i in range(n_layers)])

        self.dropout = nn.Dropout(self.dropout)

        self.layer_norm = nn.LayerNorm(d_model)

    def make_encoder_layer(self):
        encoder_layer = nn.ModuleList([MultiHeadAttention(d_model=self.d_model,
                                                          n_heads=self.n_heads,
                                                          d_queries=self.d_queries,
                                                          d_values=self.d_values,
                                                          dropout=self.dropout,
                                                          in_decoder=False),
                                       PositionWiseFeedForward(d_model=self.d_model,
                                                             d_inner=self.d_inner,
                                                             dropout=self.dropout)])

        return encoder_layer

    def forward(self, encoder_sequences):

        encoder_sequences = encoder_sequences * math.sqrt(self.d_model) + self.positional_encoding.to(device)

        encoder_sequences = self.dropout(encoder_sequences)

        for layer in self.encoder_layers:

            encoder_sequences = layer[0](query_sequences=encoder_sequences, key_sequences=encoder_sequences, value_sequences=encoder_sequences)
            
            encoder_sequences = layer[1](sequences=encoder_sequences)

        #encoder_sequences = self.layer_norm(encoder_sequences) # For I will not use embedding and integers unless someone proves to me that it's for the best

        return encoder_sequences

In [11]:
class Decoder(nn.Module):

    def __init__(self, positional_encoding, d_model, n_heads, d_queries, d_values, d_inner, n_layers, dropout):

        super(Decoder, self).__init__()

        self.positional_encoding = positional_encoding
        self.d_model = d_model
        self.n_heads = n_heads
        self.d_queries = d_queries
        self.d_values = d_values
        self.d_inner = d_inner
        self.n_layers = n_layers
        self.dropout = dropout

        self.positional_encoding.requires_grad = False

        self.decoder_layers = nn.ModuleList([self.make_decoder_layer() for i in range(n_layers)])

        self.dropout = nn.Dropout(self.dropout)

        self.layer_norm = nn.LayerNorm(d_model)

        self.neuron = nn.Linear(d_model, 1)

    def make_decoder_layer(self):

        decoder_layer = nn.ModuleList([MultiHeadAttention(d_model=self.d_model,
                                                          n_heads=self.n_heads,
                                                          d_queries=self.d_queries,
                                                          d_values=self.d_values,
                                                          dropout=self.dropout,
                                                          in_decoder=True),
                                       MultiHeadAttention(d_model=self.d_model,
                                                          n_heads=self.n_heads,
                                                          d_queries=self.d_queries,
                                                          d_values=self.d_values,
                                                          dropout=self.dropout,
                                                          in_decoder=True),
                                       PositionWiseFeedForward(d_model=self.d_model,
                                                             d_inner=self.d_inner,
                                                             dropout=self.dropout)])

        return decoder_layer

    def forward(self, decoder_sequences, encoder_sequences):

        decoder_sequences = decoder_sequences * math.sqrt(self.d_model) + self.positional_encoding.to(device)

        decoder_sequences = self.dropout(decoder_sequences)

        for layer in self.decoder_layers:

            decoder_sequences = layer[0](query_sequences=decoder_sequences, key_sequences=decoder_sequences, value_sequences=decoder_sequences)

            decoder_sequences = layer[1](query_sequences=decoder_sequences, key_sequences=encoder_sequences, value_sequences=encoder_sequences)

            decoder_sequences = layer[2](sequences=decoder_sequences)

        #decoder_sequences = self.layer_norm(decoder_sequences)

        output = self.neuron(decoder_sequences)

        return output

In [12]:
class Transformer(nn.Module):

    def __init__(self, positional_encoding, d_model=512, n_heads=8, d_queries=64, d_values=64, d_inner=2056, n_layers=6, dropout=0.1):

        super(Transformer, self).__init__()

        self.positional_encoding = positional_encoding
        self.d_model = d_model
        self.n_heads = n_heads
        self.d_queries = d_queries
        self.d_values = d_values
        self.d_inner = d_inner
        self.n_layers = n_layers
        self.dropout = dropout

        self.encoder = Encoder(positional_encoding=positional_encoding,
                               d_model=d_model,
                               n_heads=n_heads,
                               d_queries=d_queries,
                               d_values=d_values,
                               d_inner=d_inner,
                               n_layers=n_layers,
                               dropout=self.dropout)

        self.decoder = Decoder(positional_encoding=positional_encoding,
                               d_model=d_model,
                               n_heads=n_heads,
                               d_queries=d_queries,
                               d_values=d_values,
                               d_inner=d_inner,
                               n_layers=n_layers,
                               dropout=self.dropout)
                               
    def forward(self, encoder_sequences, decoder_sequences):

        encoder_sequences = self.encoder(encoder_sequences)

        decoder_sequences = self.decoder(decoder_sequences, encoder_sequences)

        return decoder_sequences

In [13]:
def get_positional_encoding(d_model, max_length=100):
    """
    Computes positional encoding as defined in the paper.
    :param d_model: size of vectors throughout the transformer model
    :param max_length: maximum sequence length up to which positional encodings must be calculated
    :return: positional encoding, a tensor of size (1, max_length, d_model)
    """
    positional_encoding = torch.zeros((max_length, d_model))  # (max_length, d_model)
    for i in range(max_length):
        for j in range(d_model):
            if j % 2 == 0:
                positional_encoding[i, j] = math.sin(i / math.pow(10000, j / d_model))
            else:
                positional_encoding[i, j] = math.cos(i / math.pow(10000, (j - 1) / d_model))

    positional_encoding = positional_encoding.unsqueeze(0)  # (1, max_length, d_model)

    return positional_encoding

In [14]:
positional_encoding = get_positional_encoding(d_model=128, max_length=74)

In [15]:
criterion = nn.MSELoss().to(device)

In [16]:
model = Transformer(positional_encoding=positional_encoding, d_model=128, n_heads=8, d_queries=32, d_values=32, d_inner=512, n_layers=6, dropout=0.1).to(device).float()

In [17]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, betas=(0.9, 0.98), eps=1e-9, weight_decay=0)

In [18]:
dataset_creator.create_data()

English Data Size: torch.Size([6, 74, 1])	 Japanese Data Size: torch.Size([6, 74, 1])


In [19]:
dataloader = torch.utils.data.DataLoader(dataset_creator, batch_size=3, shuffle=True)

In [20]:
for epoch in range(1000):
    for i, (english, japanese) in enumerate(dataloader):
        model.zero_grad()

        input_data = english.to(device).float()
        labels = japanese.to(device).float()

        output = model(input_data, labels)

        for p in model.parameters(): # Clipping gradients (Which is way better than having to deal with vanishing gradients)
            p.register_hook(lambda grad: torch.clamp(grad, -10, 10))

        cost = criterion(output, labels)

        cost.backward()

        optimizer.step()

    if epoch % 100 == 0:
        print(f"{epoch}/1000\t Current Loss: {cost.item()}")

0/1000	 Current Loss: 319.5277404785156
100/1000	 Current Loss: 160.98880004882812
200/1000	 Current Loss: 85.14620208740234
300/1000	 Current Loss: 50.97126770019531
400/1000	 Current Loss: 24.623180389404297
500/1000	 Current Loss: 18.172019958496094
600/1000	 Current Loss: 14.908890724182129
700/1000	 Current Loss: 14.568280220031738
800/1000	 Current Loss: 12.37012767791748


KeyboardInterrupt: 

In [21]:
print(cost.item())

12.505684852600098


In [22]:
print(model.decoder.neuron.weight.grad)

tensor([[  0.2657, -10.0000,   4.0039,  -4.9134, -10.0000, -10.0000, -10.0000,
          10.0000,   2.5136,  -6.4799, -10.0000,  10.0000, -10.0000,  -2.1131,
          -5.6079, -10.0000,   3.7360,  -6.4160,  10.0000, -10.0000, -10.0000,
         -10.0000, -10.0000,  -7.4554, -10.0000, -10.0000, -10.0000, -10.0000,
         -10.0000,   8.1758,  -0.1625, -10.0000, -10.0000,  10.0000, -10.0000,
          -3.2152,  10.0000, -10.0000,   7.3092,  -9.8058,   3.5286,  10.0000,
          -8.6870,  10.0000, -10.0000, -10.0000,   9.6088,   2.1505,  -7.1060,
           5.6326,  -4.6410,   1.4897, -10.0000, -10.0000,   7.1478,  -6.0898,
         -10.0000,  10.0000,  -9.1414,  10.0000,  10.0000,  10.0000,   5.3322,
           2.7692, -10.0000, -10.0000, -10.0000,   9.7994, -10.0000, -10.0000,
          -7.5377, -10.0000, -10.0000,   4.2309,   0.9620, -10.0000, -10.0000,
           7.6955,  10.0000,  10.0000,  -0.3236,  -4.7607,  10.0000,   2.3023,
         -10.0000,   4.2738, -10.0000,  -4.7061, -10

In [23]:
print(output)

tensor([[[ -0.1439],
         [  2.4920],
         [ -2.6357],
         [ -5.6632],
         [  3.4646],
         [  3.3105],
         [ -2.6756],
         [ -2.3067],
         [ -1.1135],
         [  5.3477],
         [ -4.4073],
         [  0.7893],
         [ -0.3086],
         [  8.3167],
         [ -1.2720],
         [  0.9941],
         [  1.3653],
         [ -0.8380],
         [ -2.9647],
         [  9.0932],
         [ -5.3600],
         [ -3.8809],
         [  1.2539],
         [  5.4797],
         [ -4.1944],
         [  2.8696],
         [  3.8522],
         [ -2.1797],
         [ -1.8041],
         [ -2.8612],
         [  5.8377],
         [  4.9059],
         [  5.1100],
         [  4.4704],
         [  0.4920],
         [  7.3925],
         [  1.2245],
         [ -3.8782],
         [  3.1701],
         [ -2.3260],
         [  0.6317],
         [  5.6589],
         [ -4.3303],
         [  1.0163],
         [  4.9993],
         [ -2.1649],
         [  2.0287],
         [  3

In [24]:
print(labels)

tensor([[[-1.0000],
         [-0.9775],
         [-0.9551],
         [-0.9326],
         [-0.9101],
         [-0.8876],
         [-0.8652],
         [-0.8427],
         [-0.8202],
         [-0.9326],
         [-0.7978],
         [-0.7753],
         [-0.7528],
         [-0.7303],
         [-0.7079],
         [-0.6854],
         [-0.6629],
         [-0.8427],
         [-0.6404],
         [-0.6180],
         [-0.8652],
         [-0.5955],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],
         [ 1.0000],


In [25]:
teste = output.detach()

teste = dataset_creator.detokenize(teste[0], dataset_creator.japanese_dictionary)

labels = dataset_creator.detokenize(labels[0], dataset_creator.japanese_dictionary)

In [26]:
print(teste)

('っ <EOS> 私 私 <EOS> <EOS> 私 私 私 <EOS> 私 本 い <EOS> 私 <EOS> <EOS> き 私 <EOS> 私 私 <EOS> <EOS> 私 <EOS> <EOS> 私 私 私 <EOS> <EOS> <EOS> <EOS> 確 <EOS> <EOS> 私 <EOS> 私 小 <EOS> 私 <EOS> <EOS> 私 <EOS> <EOS> 私 私 能 <EOS> <EOS> <EOS> 私 <EOS> 私 <EOS> む <EOS> 私 <EOS> <EOS> 私 <EOS> 私 私 し 私 む 私 間 <EOS> 私', ['っ', '<EOS>', '私', '私', '<EOS>', '<EOS>', '私', '私', '私', '<EOS>', '私', '本', 'い', '<EOS>', '私', '<EOS>', '<EOS>', 'き', '私', '<EOS>', '私', '私', '<EOS>', '<EOS>', '私', '<EOS>', '<EOS>', '私', '私', '私', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '確', '<EOS>', '<EOS>', '私', '<EOS>', '私', '小', '<EOS>', '私', '<EOS>', '<EOS>', '私', '<EOS>', '<EOS>', '私', '私', '能', '<EOS>', '<EOS>', '<EOS>', '私', '<EOS>', '私', '<EOS>', 'む', '<EOS>', '私', '<EOS>', '<EOS>', '私', '<EOS>', '私', '私', 'し', '私', 'む', '私', '間', '<EOS>', '私'])


In [27]:
print(labels)

('私 の 犬 は 骨 が 好 き で は あ り ま せ ん 牛 ひ き 肉 を 好 む <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS> <EOS>', ['私', 'の', '犬', 'は', '骨', 'が', '好', 'き', 'で', 'は', 'あ', 'り', 'ま', 'せ', 'ん', '牛', 'ひ', 'き', '肉', 'を', '好', 'む', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>', '<EOS>'])
