# Step 1. 데이터 다운로드

In [1]:
from requests import get
import os

os.mkdir('./data')

def download(url, file_name):
    with open(file_name, "wb") as file:   
        response = get(url)
        file.write(response.content)
        
url = 'https://raw.githubusercontent.com/songys/Chatbot_data/master'
file_name = 'ChatbotData.csv'

download(f'{url}/{file_name}', f'./data/{file_name}')

In [2]:
os.listdir('./data')

['ChatbotData.csv']

# Step 2. 데이터 정제

In [1]:
import pandas as pd

df = pd.read_csv(f'./data/ChatbotData.csv')

df.head()

Unnamed: 0,Q,A,label
0,12시 땡!,하루가 또 가네요.,0
1,1지망 학교 떨어졌어,위로해 드립니다.,0
2,3박4일 놀러가고 싶다,여행은 언제나 좋죠.,0
3,3박4일 정도 놀러가고 싶다,여행은 언제나 좋죠.,0
4,PPL 심하네,눈살이 찌푸려지죠.,0


In [2]:
df.drop(['label'], axis=1, inplace=True)
df.isnull().sum()

Q    0
A    0
dtype: int64

In [3]:
df.drop_duplicates(['Q'], inplace=True)
df.drop_duplicates(['A'], inplace=True)

len(df)

7731

In [4]:
import re

def preprocess_sentence(sentence):
    
    sentence = sentence.lower().strip()
    sentence = re.sub(r'[" "]+', " ", sentence)
    sentence = re.sub(r"[^a-zA-Z?.!가-힣ㄱ-ㅎㅏ-ㅣ0-9]+", " ", sentence)
    sentence = sentence.strip()
    
    return sentence

In [5]:
df['Q'] = df['Q'].apply(lambda it: preprocess_sentence(it))
df['A'] = df['A'].apply(lambda it: preprocess_sentence(it))

df.head()

Unnamed: 0,Q,A
0,12시 땡!,하루가 또 가네요.
1,1지망 학교 떨어졌어,위로해 드립니다.
2,3박4일 놀러가고 싶다,여행은 언제나 좋죠.
4,ppl 심하네,눈살이 찌푸려지죠.
5,sd카드 망가졌어,다시 새로 사는 게 마음 편해요.


In [6]:
questions = df['Q']
answers = df['A']

len(questions), len(answers)

(7731, 7731)

# Step 3. 데이터 토큰화

In [7]:
from konlpy.tag import Mecab
import numpy as np


def build_corpus(src_data, tgt_data):
    
    mecab = Mecab()
    
    def get_morphs(s):
        return mecab.morphs(s)
    
    mecab_src_corpus = list(map(get_morphs, src_data))
    mecab_tgt_corpus = list(map(get_morphs, src_data))
    
    mecab_num_tokens = [len(s) for s in mecab_src_corpus] + [len(s) for s in mecab_tgt_corpus]
    
    # 최대 길이를 (평균 + 2*표준편차)로 계산
    max_len = round(np.mean(mecab_num_tokens) + 2 * np.std(mecab_num_tokens))
    print(f'max_len : {max_len}')
    
    src_corpus, tgt_corpus = [], []
    for q, a in zip(mecab_src_corpus, mecab_tgt_corpus):
        if len(q) <= max_len and len(a) <= max_len:
            if q not in src_corpus and a not in tgt_corpus:
                src_corpus.append(q)
                tgt_corpus.append(a)
    
    return src_corpus, tgt_corpus

que_corpus, ans_corpus = build_corpus(questions, answers)

max_len : 15


In [8]:
len(que_corpus), len(ans_corpus)

(7429, 7429)

# Step 4. Augmentation

In [10]:
from gensim.models.word2vec import Word2Vec

w2v_model = Word2Vec.load('/aiffel/aiffel/transformer_chatbot/data/word2vec_ko.model')

In [11]:
import random


def lexical_sub(sentence, word2vec):
    
    try:
        _from = random.choice(sentence)
        _to = word2vec.wv.most_similar(_from)[0][0]
    except:
        return None
    
    res = []
    for x in sentence:
        if x is _from: res.append(_to)
        else: res.append(x)

    return res

In [12]:
from tqdm import tqdm_notebook


def augment_corpus(src_corpus, tgt_corpus, wv):
    new_src_corpus = []
    new_tgt_corpus = []
    corpus_size = len(src_corpus)
    
    for i in tqdm_notebook(range(corpus_size)):
        q = src_corpus[i]
        a = tgt_corpus[i]
        
        new_src = lexical_sub(q, wv)
        new_tgt = lexical_sub(a, wv)
        
        if new_src: 
            new_src_corpus.append(new_src)
            new_tgt_corpus.append(a)
            
        if new_tgt: 
            new_src_corpus.append(q)
            new_tgt_corpus.append(new_tgt)
    
    return new_src_corpus, new_tgt_corpus

In [13]:
aug_que, aug_anw = augment_corpus(que_corpus, ans_corpus, w2v_model)

que_corpus += aug_que
ans_corpus += aug_anw
    
len(que_corpus), len(ans_corpus)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm_notebook(range(corpus_size)):


  0%|          | 0/7429 [00:00<?, ?it/s]

(22098, 22098)

# Step 5. 데이터 벡터화

In [14]:
ans_corpus = [["<start>"] + ans + ["<end>"] for ans in ans_corpus]

In [15]:
import tensorflow as tf  


data = que_corpus + ans_corpus

tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=None, filters=' ', oov_token='<unk>')
tokenizer.fit_on_texts(data)

enc_train = tokenizer.texts_to_sequences(que_corpus)
enc_train = tf.keras.preprocessing.sequence.pad_sequences(enc_train, padding='post')

dec_train = tokenizer.texts_to_sequences(ans_corpus)
dec_train = tf.keras.preprocessing.sequence.pad_sequences(dec_train, padding='post')

In [16]:
enc_train.shape, dec_train.shape

((22098, 15), (22098, 17))

In [17]:
vocab_size = len(tokenizer.index_word) + 2

vocab_size

5851

# Step 6. 훈련하기

In [18]:
def positional_encoding(pos, d_model):
    
    def cal_angle(position, i):
        return position / np.power(10000, int(i)/d_model)  # np.power(a,b) > a^b(제곱)
    
    def get_posi_angle_vec(position):
        return [cal_angle(position, i) for i in range(d_model)]
    
    sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(pos)])
    sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])
    sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])
    
    return sinusoid_table

In [19]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        
        self.depth = d_model // self.num_heads
        
        self.W_q = tf.keras.layers.Dense(d_model)  
        self.W_k = tf.keras.layers.Dense(d_model)
        self.W_v = tf.keras.layers.Dense(d_model)
        
        self.linear = tf.keras.layers.Dense(d_model)
        
    def scaled_dot_product_attention(self, Q, K, V, mask):
        d_k = tf.cast(K.shape[-1], tf.float32)
        
        QK = tf.matmul(Q, K, transpose_b=True)
        scaled_qk = QK / tf.math.sqrt(d_k)
        
        if mask is not None:
            scaled_qk += (mask * -1e9)
        
        attentions = tf.nn.softmax(scaled_qk, axis=-1)

        out = tf.matmul(attentions, V)
        return out, attentions
    
    def split_heads(self, x):
        bsz = x.shape[0]
        split_x = tf.reshape(x, (bsz, -1, self.num_heads, self.depth))
        split_x = tf.transpose(split_x, perm=[0, 2, 1, 3])
        return split_x
    
    def combine_heads(self, x):
        bsz = x.shape[0]
        combined_x = tf.transpose(x, perm=[0, 2, 1, 3])
        combined_x = tf.reshape(combined_x, (bsz, -1, self.d_model))
        return combined_x
    
    def call(self, Q, K, V, mask):
        WQ = self.W_q(Q)
        WK = self.W_k(K)
        WV = self.W_v(V)
        
        WQ_splits = self.split_heads(WQ)
        WK_splits = self.split_heads(WK)
        WV_splits = self.split_heads(WV)
        
        out, attention_weights = self.scaled_dot_product_attention(
            WQ_splits, WK_splits, WV_splits, mask
        )
        
        out = self.combine_heads(out)
        out = self.linear(out)
        
        return out, attention_weights

In [20]:
class PoswiseFeedForwardNet(tf.keras.layers.Layer):
    def __init__(self, d_model, d_ff):
        super(PoswiseFeedForwardNet, self).__init__()
        self.w_1 = tf.keras.layers.Dense(d_ff, activation='relu')
        self.w_2 = tf.keras.layers.Dense(d_model)
        
    def call(self, x):
        out = self.w_1(x)
        out = self.w_2(out)
        return out

In [21]:
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, n_heads, d_ff, dropout):
        super(EncoderLayer, self).__init__()
        self.enc_self_attn = MultiHeadAttention(d_model, n_heads)
        self.ffn = PoswiseFeedForwardNet(d_model, d_ff)
        
        self.norm_1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.norm_2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        
        self.do = tf.keras.layers.Dropout(dropout)
        
    def call(self, x, mask):

        residual = x
        out = self.norm_1(x)
        out, enc_attn = self.enc_self_attn(out, out, out, mask)
        out = self.do(out)
        out += residual
        
        residual = out
        out = self.norm_2(out)
        out = self.ffn(out)
        out = self.do(out)
        out += residual
        
        return out, enc_attn

In [22]:
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, d_ff, dropout):
        super(DecoderLayer, self).__init__()

        self.dec_self_attn = MultiHeadAttention(d_model, num_heads)
        self.enc_dec_attn = MultiHeadAttention(d_model, num_heads)

        self.ffn = PoswiseFeedForwardNet(d_model, d_ff)

        self.norm_1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.norm_2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.norm_3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.do = tf.keras.layers.Dropout(dropout)
        
    def call(self, x, enc_out, causality_mask, padding_mask):

        residual = x
        out = self.norm_1(x)

        out, dec_attn = self.dec_self_attn(out, out, out, padding_mask)
        out = self.do(out)
        out += residual
        
        residual = out
        out = self.norm_2(out)

        out, dec_enc_attn = self.enc_dec_attn(out, enc_out, enc_out, causality_mask)
        out = self.do(out)
        out += residual

        residual = out
        out = self.norm_3(out)
        out = self.ffn(out)
        out = self.do(out)
        out += residual

        return out, dec_attn, dec_enc_attn

In [23]:
class Encoder(tf.keras.Model):
    def __init__(self, n_layers, d_model, n_heads, d_ff, dropout):
        super(Encoder, self).__init__()
        self.n_layers = n_layers
        self.enc_layers = [EncoderLayer(d_model, n_heads, d_ff, dropout) for _ in range(n_layers)]
        self.do = tf.keras.layers.Dropout(dropout)
        
    def call(self, x, mask):
        out = x
        enc_attns = list()
        for i in range(self.n_layers):
            out, enc_attn = self.enc_layers[i](out, mask)
            enc_attns.append(enc_attn)
            
        return out, enc_attns

In [24]:
class Decoder(tf.keras.Model):
    def __init__(self, n_layers, d_model, n_heads, d_ff, dropout):
        super(Decoder, self).__init__()
        self.n_layers = n_layers
        self.dec_layers = [DecoderLayer(d_model, n_heads, d_ff, dropout) for _ in range(n_layers)]
        
    def call(self, x, enc_out, causality_mask, padding_mask):
        out = x
        dec_attns = list()
        dec_enc_attns = list()
        for i in range(self.n_layers):
            out, dec_attn, dec_enc_attn = self.dec_layers[i](out, enc_out, causality_mask, padding_mask)
            dec_attns.append(dec_attn)
            dec_enc_attns.append(dec_enc_attn)
        
        return out, dec_attns, dec_enc_attns

In [25]:
class Transformer(tf.keras.Model):
    def __init__(self, n_layers, d_model, n_heads, d_ff, src_vocab_size, tgt_vocab_size,
                 pos_len, dropout=0.2, shared=True):
        super(Transformer, self).__init__()
        self.d_model = tf.cast(d_model, tf.float32)
        
        self.enc_emb = tf.keras.layers.Embedding(src_vocab_size, d_model)
        self.dec_emb = tf.keras.layers.Embedding(tgt_vocab_size, d_model)
        
        self.pos_encoding = positional_encoding(pos_len, d_model)
        
        self.do = tf.keras.layers.Dropout(dropout)
        
        self.encoder = Encoder(n_layers, d_model, n_heads, d_ff, dropout)
        self.decoder = Decoder(n_layers, d_model, n_heads, d_ff, dropout)
        
        self.fc = tf.keras.layers.Dense(tgt_vocab_size)
        
        self.shared = shared
        
        if shared:
            self.fc.set_weights(tf.transpose(self.dec_emb.weights))
        
        
    def embedding(self, emb, x):
        
        seq_len = x.shape[1]
        out = emb(x)
        
        if self.shared:
            out *= tf.math.sqrt(self.d_model)
        
        out += self.pos_encoding[np.newaxis, ...][:, :seq_len, :]
        out = self.do(out)
        
        return out
    
    def call(self, enc_in, dec_in, enc_mask, causality_mask, dec_mask):

        enc_in = self.embedding(self.enc_emb, enc_in)
        dec_in = self.embedding(self.dec_emb, dec_in)

        enc_out, enc_attns = self.encoder(enc_in, enc_mask)

        dec_out, dec_attns, dec_enc_attns = self.decoder(dec_in, enc_out, causality_mask, dec_mask)

        logits = self.fc(dec_out)
        return logits, enc_attns, dec_attns, dec_enc_attns

In [26]:
def generate_padding_mask(seq):
    seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
    return seq[:, tf.newaxis, tf.newaxis, :]

def generate_causality_mask(src_len, tgt_len):
    mask = 1 - np.cumsum(np.eye(src_len, tgt_len), 0)
    return tf.cast(mask, tf.float32)

def generate_masks(src, tgt):
    enc_mask = generate_padding_mask(src)
    dec_mask = generate_padding_mask(tgt)

    dec_enc_causality_mask = generate_causality_mask(tgt.shape[1], src.shape[1])
    dec_enc_mask = tf.maximum(enc_mask, dec_enc_causality_mask)

    dec_causality_mask = generate_causality_mask(tgt.shape[1], tgt.shape[1])
    dec_mask = tf.maximum(dec_mask, dec_causality_mask)

    return enc_mask, dec_enc_mask, dec_mask

In [27]:
class LearningRateScheduler(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, d_model, warmup_steps=4000):
        super(LearningRateScheduler, self).__init__()
        self.d_model = d_model
        self.warmup_steps = warmup_steps
    
    def __call__(self, step):
        arg1 = step ** -0.5
        arg2 = step * (self.warmup_steps ** -1.5)
        
        return (self.d_model ** -0.5) * tf.math.minimum(arg1, arg2)

In [31]:
from tqdm.notebook import tqdm
import random
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction

class ChatBot():
    def __init__(self, tokenizer, enc_data, dec_data,
                 n_layers=6, d_model=512, n_heads=8, d_ff=2048,
                 vocab_size=20000, pos_len=200, dropout=0.3, shared=True,
                 epochs=20, batch_size=64):
        super(ChatBot, self).__init__()
        
        self.tokenizer = tokenizer
        self.enc_data = enc_data
        self.dec_data = dec_data
        
        self.n_layers = n_layers
        self.d_model = d_model
        self.n_heads = n_heads
        self.d_ff = d_ff
        self.dropout = dropout
        
        self.model = Transformer(
            n_layers=n_layers,
            d_model=d_model,
            n_heads=n_heads,
            d_ff=d_ff,
            src_vocab_size=vocab_size,
            tgt_vocab_size=vocab_size,
            pos_len=pos_len,
            dropout=dropout,
            shared=shared
        )
        
        self.EPOCHS = epochs
        self.BATCH_SIZE = batch_size
        
        learning_rate = LearningRateScheduler(d_model)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
        self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
        
        self.examples = [
            "지루하다, 놀러가고 싶어.",
            "오늘 일찍 일어났더니 피곤하다.",
            "간만에 여자친구랑 데이트 하기로 했어.",
            "집에 있는다는 소리야."
        ]
        
    def loss_function(self, real, pred):
        mask = tf.math.logical_not(tf.math.equal(real, 0))
        loss_ = self.loss_object(real, pred)

        mask = tf.cast(mask, dtype=loss_.dtype)
        loss_ *= mask

        return tf.reduce_sum(loss_)/tf.reduce_sum(mask)
    
    
    @tf.function()
    def train_step(self, src, tgt):
        gold = tgt[:, 1:]

        enc_mask, dec_enc_mask, dec_mask = generate_masks(src, tgt)

        with tf.GradientTape() as tape:
            predictions, enc_attns, dec_attns, dec_enc_attns = self.model(src, tgt, enc_mask, dec_enc_mask, dec_mask)
            loss = self.loss_function(gold, predictions[:, :-1])

        gradients = tape.gradient(loss, self.model.trainable_variables)    
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))

        return loss, enc_attns, dec_attns, dec_enc_attns
        
        
    def train(self):
        
        for epoch in range(self.EPOCHS):
            total_loss = 0

            idx_list = list(range(0, self.enc_data.shape[0], self.BATCH_SIZE))   
            random.shuffle(idx_list)
            t = tqdm(idx_list)
            
            for (batch, idx) in enumerate(t):
                batch_loss, enc_attns, dec_attns, dec_enc_attns = self.train_step(
                    self.enc_data[idx:idx+self.BATCH_SIZE],
                    self.dec_data[idx:idx+self.BATCH_SIZE])
                
                total_loss += batch_loss

                t.set_description_str('Epoch %2d' % (epoch + 1))
                t.set_postfix_str('Loss %.4f' % (total_loss.numpy() / (batch + 1)))
                
                
    def evaluate(self, sentence):
        mecab = Mecab()
        sentence = mecab.morphs(preprocess_sentence(sentence))
        sentence = self.tokenizer.texts_to_sequences(sentence)
        _input = tf.keras.preprocessing.sequence.pad_sequences(
            sentence, maxlen=self.enc_data.shape[-1], padding='post')

        ids = []
        output = tf.expand_dims([self.tokenizer.word_index['<start>']], 0)
        for i in range(self.dec_data.shape[-1]):
            enc_padding_mask, combined_mask, dec_padding_mask = generate_masks(_input, output)

            predictions, enc_attns, dec_attns, dec_enc_attns = self.model(
                _input, output, enc_padding_mask, combined_mask, dec_padding_mask)
            
            predicted_id = tf.argmax(tf.math.softmax(predictions, axis=-1)[0, -1]).numpy().item()
            
            if tokenizer.word_index['<end>'] == predicted_id:
                result = ' '.join(self.tokenizer.sequences_to_texts([ids]))
                return result, enc_attns, dec_attns, dec_enc_attns
            
            ids.append(predicted_id)
            output = tf.concat([output, tf.expand_dims([predicted_id], 0)], axis=-1)
    
        result = ' '.join(self.tokenizer.sequences_to_texts([ids]))
        return result, enc_attns, dec_attns, dec_enc_attns
        
        
    def chat(self, sentence):
        result, enc_attns, dec_attns, dec_enc_attns = self.evaluate(sentence)
        
        print(f'Q: {sentence}')
        print(f'A: {result}')
       
        
    def eval_bleu_single(self, src_sentence, tgt_sentence):
        src_tokens = self.tokenizer.texts_to_sequences(src_sentence)
        tgt_tokens = self.tokenizer.texts_to_sequences(tgt_sentence)

        if (len(src_tokens) > self.enc_data.shape[-1]): return None
        if (len(tgt_tokens) > self.enc_data.shape[-1]): return None

        reference = tgt_sentence.split()
        candidate, _, _, _ = self.evaluate(src_sentence)
        
        score = sentence_bleu([reference], candidate,
                              smoothing_function=SmoothingFunction().method1)

        return score


    def eval_bleu(self, src_sentences, tgt_sentence):
        total_score = 0.0
        sample_size = len(src_sentences)

        for idx in tqdm(range(sample_size)):
            score = self.eval_bleu_single(src_sentences[idx], tgt_sentence[idx])
            if not score: continue

            total_score += score
            
        return total_score / sample_size
        
    
    def submit(self, src_sentences, tgt_sentence):
        print('# 제출')
        print()
        
        print('Translations')
        for i, s in enumerate(self.examples):
            print(f'> {i + 1}')
            self.chat(s)
            print()
            
        print('BLEU Score')
        print(f'score: {self.eval_bleu(src_sentences, tgt_sentence)}')
        print()

        print('Hyperparameters')
        print(f'> n_layers: {self.n_layers}')
        print(f'> d_model: {self.d_model}')
        print(f'> n_heads: {self.n_heads}')
        print(f'> d_ff: {self.d_ff}')
        print(f'> dropout: {self.dropout}')
        print()
        
        print('Training Parameters')
        print(f'> Batch Size: {self.BATCH_SIZE}')
        print(f'> Epoch At: {self.EPOCHS}')

# Step 7. 성능 측정하기

In [32]:
model_1 = ChatBot(tokenizer, enc_train, dec_train)
model_1.train()

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

In [33]:
model_1.submit(list(questions[:100]), list(answers[:100]))

# 제출

Translations
> 1
Q: 지루하다, 놀러가고 싶어.
A: 

> 2
Q: 오늘 일찍 일어났더니 피곤하다.
A: 오늘

> 3
Q: 간만에 여자친구랑 데이트 하기로 했어.
A: 간만에

> 4
Q: 집에 있는다는 소리야.
A: 집

BLEU Score


  0%|          | 0/100 [00:00<?, ?it/s]

score: 0.0

Hyperparameters
> n_layers: 6
> d_model: 512
> n_heads: 8
> d_ff: 2048
> dropout: 0.3

Training Parameters
> Batch Size: 64
> Epoch At: 20


In [34]:
model_2 = ChatBot(tokenizer, enc_train, dec_train, n_layers=6, n_heads=16, d_model=1024, d_ff=4096)
model_2.train()

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

In [35]:
model_2.submit(list(questions[:100]), list(answers[:100]))

# 제출

Translations
> 1
Q: 지루하다, 놀러가고 싶어.
A: 서 여자 만나 서

> 2
Q: 오늘 일찍 일어났더니 피곤하다.
A: 6

> 3
Q: 간만에 여자친구랑 데이트 하기로 했어.
A: 격려

> 4
Q: 집에 있는다는 소리야.
A: 집 집 집 집 집

BLEU Score


  0%|          | 0/100 [00:00<?, ?it/s]

score: 0.0

Hyperparameters
> n_layers: 6
> d_model: 1024
> n_heads: 16
> d_ff: 4096
> dropout: 0.3

Training Parameters
> Batch Size: 64
> Epoch At: 20


In [36]:
model_3 = ChatBot(tokenizer, enc_train, dec_train, epochs=50)
model_3.train()

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

In [37]:
model_3.submit(list(questions[:100]), list(answers[:100]))

# 제출

Translations
> 1
Q: 지루하다, 놀러가고 싶어.
A: 한다

> 2
Q: 오늘 일찍 일어났더니 피곤하다.
A: 내일

> 3
Q: 간만에 여자친구랑 데이트 하기로 했어.
A: 간만에

> 4
Q: 집에 있는다는 소리야.
A: 방

BLEU Score


  0%|          | 0/100 [00:00<?, ?it/s]

score: 0.0

Hyperparameters
> n_layers: 6
> d_model: 512
> n_heads: 8
> d_ff: 2048
> dropout: 0.3

Training Parameters
> Batch Size: 64
> Epoch At: 50


In [38]:
model_4 = ChatBot(tokenizer, enc_train, dec_train, n_layers=6, n_heads=16, d_model=1024, d_ff=4096, epochs=50)
model_4.train()

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

In [39]:
model_4.submit(list(questions[:100]), list(answers[:100]))

# 제출

Translations
> 1
Q: 지루하다, 놀러가고 싶어.
A: 지루 수 가

> 2
Q: 오늘 일찍 일어났더니 피곤하다.
A: 오늘 생각 오늘

> 3
Q: 간만에 여자친구랑 데이트 하기로 했어.
A: 모틀란테 귀여워

> 4
Q: 집에 있는다는 소리야.
A: 집 앞 집 앞 집 집 집 집 집 집 고치 집 고치 집 앞 집 앞

BLEU Score


  0%|          | 0/100 [00:00<?, ?it/s]

score: 0.0

Hyperparameters
> n_layers: 6
> d_model: 1024
> n_heads: 16
> d_ff: 4096
> dropout: 0.3

Training Parameters
> Batch Size: 64
> Epoch At: 50


In [None]:
model_5 = ChatBot(tokenizer, enc_train, dec_train, n_layers=6, n_heads=16, d_model=1024, d_ff=4096, epochs=100)
model_5.train()

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/346 [00:00<?, ?it/s]

In [None]:
model_5.submit(list(questions[:100]), list(answers[:100]))