In [5]:

import numpy as np
import pandas as pd
import tensorflow as tf
import math
import tensorflow_datasets as tfds
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
import re

In [6]:
os.chdir("/home/bmllab/bml_pjh/project/chatbot/data")
data=pd.read_csv("chatbot_data.csv")

In [7]:
#SubwordTextEncoder를 사용하기 위해 전처리 과정
q=[]
a=[]
for i in range(data.shape[0]):
    data_q=re.sub(r"([?.!,])", r" \1 ", data.loc[i,'Q'])
    data_q=data_q.strip()
    
    data_a=re.sub(r"([?.!,])", r" \1 ", data.loc[i,'A'])
    data_a=data_a.strip()    
    q.append(data_q)
    a.append(data_a)

In [8]:
tokenizer=tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(q+a,target_vocab_size=2**13)# 단어 모음집 생성
#문장의 start와 end를 정의할 토큰 정의
start_token,eos_token= [tokenizer.vocab_size],[tokenizer.vocab_size+1]
#문장의 시작과 끝을 가리키는 토큰들을 고려해서 단어장의 사이즈 정의
vocab_size=tokenizer.vocab_size+2
enc_q=[]
enc_a=[]
for i in q:
    enc_q.append(start_token+tokenizer.encode(i)+eos_token)
for j in a:
    enc_a.append(start_token+tokenizer.encode(j)+eos_token)
    
for_max_len=[]
for_max_len.extend(enc_q)
for_max_len.extend(enc_a)

    




In [9]:
max_len=max(len(i) for i in for_max_len)

In [10]:
max_len

75

In [11]:
sentence_int_pad_q=pad_sequences(enc_q,maxlen=max_len,padding="post")
sentence_int_pad_a=pad_sequences(enc_a,maxlen=max_len,padding="post")


In [12]:
sentence_int_pad_q.shape

(58960, 75)

In [22]:
datasets=tf.data.Dataset.from_tensor_slices(({'input_1':sentence_int_pad_q,'input_2':sentence_int_pad_a[:,:-1]},{"outputs":sentence_int_pad_a[:,1:]}))

In [14]:
class pos_enc(tf.keras.layers.Layer):
    def __init__(self,vocab_size,d_model):
        super(pos_enc, self).__init__()
        self.d_model=d_model
        self.vocab_size=vocab_size
        
    def get_pos(self,pos,i,d_model):#pos 행 ,D_MODEL은 임베딩의 차원 
        if i%2==0:
            return tf.math.sin(pos/(10000**(2*i/d_model)))
        else:
            return tf.math.cos(pos/(10000**(2*i/d_model)))
        
    def make_mat(self,vocab_size,d_model):
        re=[]
        pos_mat=[]
        for i in range(vocab_size):
            row=[]
            for j in range(d_model):
                pos_en= self.get_pos(pos=i+1,i=j+1,d_model=d_model)
                #row.append(inputs[i,j].numpy()+pos_en.numpy())
                row.append(pos_en)

            pos_mat.append(row)
        return pos_mat


    def call(self,inputs):
        pos_mat=self.make_mat(self.vocab_size,self.d_model)
        return tf.add(inputs,pos_mat)

In [15]:
def create_look_ahead_mask(x):
    seq_len = tf.shape(x)[1]
    look_ahead_mask = 1 - tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
    padding_mask = create_padding_mask(x) # 패딩 마스크도 포함
    return tf.maximum(look_ahead_mask, padding_mask)
            
def create_padding_mask(x):#아래 scale_dot_product에서 마스킹을 위한 m을 만드는 함수이다. 아래에서 layers.Lamda를 통해 활용
    mask = tf.cast(tf.math.equal(x, 0), tf.float32)
  # (batch_size, 1, 1, key의 문장 길이)
    return mask[:, tf.newaxis, tf.newaxis, :]
    
def scaled_dot_product_attention(q,k,v,m=None):
    att_score=tf.matmul(q,k,transpose_b=True)/tf.math.sqrt(tf.cast(tf.shape(q)[-1],tf.float32))
    if m is not None:
        att_score += (m * -1e9)#거의 -무한대로 마스킹해줌
    att_w=tf.nn.softmax(att_score,axis=-1)
    result=tf.matmul(att_w,v)
    return result

class Multi_head_layer(tf.keras.layers.Layer):#d_model은 입력의 차원 
    def __init__(self,d_model,num_of_head):
        super(Multi_head_layer, self).__init__()
        self.w_q=tf.keras.layers.Dense(d_model)
        self.w_k=tf.keras.layers.Dense(d_model)
        self.w_v=tf.keras.layers.Dense(d_model)
        self.w_summ=tf.keras.layers.Dense(d_model)
        self.depth=int(d_model/num_of_head)
        self.num_head=num_of_head

    def call(self,query,key,value,mask):
        q,k,v=query,key,value
        m=mask
        batch_size=tf.shape(q)[0]
        query=self.w_q(q)
        key=self.w_k(k)
        value=self.w_v(v)
        query=tf.reshape(query,shape=(batch_size,self.num_head,-1,self.depth))
        key=tf.reshape(key,shape=(batch_size,self.num_head,-1,self.depth))
        value=tf.reshape(value,shape=(batch_size,self.num_head,-1,self.depth))
        atten_w=scaled_dot_product_attention(query,key,value,m)
        atten_w = tf.transpose(atten_w, perm=[0, 2, 1, 3])
        atten_w=tf.reshape(atten_w,(batch_size,-1,self.depth))
        atten_w=self.w_summ(atten_w)
        
        return atten_w
#x의 형태는(batch_size,문장의 길이,depth)
def encoder_block(d_model,num_of_head,d_ff,name="encoder_block"):
    input_=tf.keras.layers.Input(shape=(None,d_model))
    #인코더의 padding mask 정의
    padding_mask=tf.keras.layers.Input(shape=(1,1,None),name='padding_mask')
    #아까 정의한 multi head attention을 사용한다.
    attention=Multi_head_layer(d_model,num_of_head)(input_,input_,input_,padding_mask)
    #multi head attention의 결과를 잔차 연결한다
    attention=tf.add(input_,attention)
    #layernormalization을 해준다.
    attention=tf.keras.layers.LayerNormalization(epsilon=1e-6)(attention)
    #여기가 feed forward net부분이다.
    attention_1=tf.keras.layers.Dense(d_ff,activation='relu')(attention)
    attention_1=tf.keras.layers.Dense(d_model)(attention)
    #그후 잔차 연결과 layernormalization을 해준다.
    attention_2=tf.add(attention_1,attention)
    outputs=tf.keras.layers.LayerNormalization(epsilon=1e-6)(attention_2)
    
    return tf.keras.Model(inputs=[input_,padding_mask],outputs=outputs)


def decoder_block(d_model,d_ff,num_of_head,name="decoder_block"):#d_model은 출력의 차원
    input_=tf.keras.layers.Input(shape=(None,d_model))
    enc_output=tf.keras.layers.Input(shape=(None,d_model))
    
    look_ahead_mask = tf.keras.Input(shape=(1, None, None), name="look_ahead_mask")#이거는 현재 시점보다 뒤에 시점의 attention을 배제하기 위해 masking하는 부분
    padding_mask=tf.keras.layers.Input(shape=(1,1,None))#<pad>토큰을 masking해줘서 유사도를 구하지 않게 해줌
    
    #self attention부분이다 self attention이기에 현재 이후의 시점의 attention을 배제하기 위해 look_ahead_mask를 사용한다
    self_attention=Multi_head_layer(d_model,num_of_head)(input_,input_,input_,look_ahead_mask)
    self_attention=tf.add(self_attention,input_)
    self_attention=tf.keras.layers.LayerNormalization()(self_attention)
    
    enc_dec_attention=Multi_head_layer(d_model,num_of_head)(self_attention,enc_output,enc_output,padding_mask)
    enc_dec_attention=tf.add(self_attention,enc_dec_attention)
    enc_dec_attention=tf.keras.layers.LayerNormalization()(enc_dec_attention)
    
    dec_attention_1=tf.keras.layers.Dense(d_ff,activation='relu')(enc_dec_attention)
    dec_attention_1=tf.keras.layers.Dense(d_model)(enc_dec_attention)
    
    last_attention=tf.add(dec_attention_1,enc_dec_attention)
    last_attention=tf.keras.layers.LayerNormalization()(enc_dec_attention)
    
    return tf.keras.models.Model(inputs=[input_,enc_output,padding_mask,look_ahead_mask],outputs=last_attention)

def transformer(d_model,d_ff,num_of_head,vocab_size):
    enc_input=tf.keras.layers.Input(shape=(None,))
    dec_input=tf.keras.layers.Input(shape=(None,))
    
    emb_enc=tf.keras.layers.Embedding(vocab_size,d_model)(enc_input)
    emb_dec=tf.keras.layers.Embedding(vocab_size,d_model)(dec_input)
    
    pad_mask=tf.keras.layers.Lambda(create_padding_mask, output_shape=(1, 1, None),name='enc_padding_mask')(enc_input)
    
    look_mask = tf.keras.layers.Lambda(create_look_ahead_mask, output_shape=(1, None, None),name='look_ahead_mask')(dec_input)
    
    pos_enc_enc=pos_enc(vocab_size,d_model)(emb_enc)
    pos_enc_dec=pos_enc(vocab_size,d_model)(emb_dec)

    
    first_enc_out=encoder_block(d_model,num_of_head,d_ff)([pos_enc_enc,pad_mask])
    print('first good')
    second_enc_out=encoder_block(d_model,num_of_head,d_ff)([first_enc_out,pad_mask])
    third_enc_out=encoder_block(d_model,num_of_head,d_ff)([second_enc_out,pad_mask])
    four_enc_out=encoder_block(d_model,num_of_head,d_ff)([third_enc_out,pad_mask])
    
    first_dec_out=decoder_block(d_model,d_ff,num_of_head)([pos_enc_dec, four_enc_out, pad_mask, look_mask])
    second_dec_out=decoder_block(d_model,d_ff,num_of_head)([dec_input, four_enc_out, pad_mask, look_mask])
    third_dec_out=decoder_block(d_model,d_ff,num_of_head)([dec_input, four_enc_out, pad_mask, look_mask])
    four_dec_out=decoder_block(d_model,d_ff,num_of_head)([dec_input, four_enc_out, pad_mask, look_mask])
    
    outputs = tf.keras.layers.Dense(vocab_size, name="outputs")(four_dec_out)
    return tf.keras.models.Model(inputs=[enc_input,dec_input],outputs=outputs)
    
    


In [23]:
BATCH_SIZE = 64
BUFFER_SIZE = 20000

datasets = datasets.cache()
datasets = datasets.shuffle(BUFFER_SIZE)
datasets = datasets.batch(BATCH_SIZE)
datasets = datasets.prefetch(tf.data.experimental.AUTOTUNE)

In [17]:
D_MODEL = 256
NUM_LAYERS = 2
NUM_HEADS = 8
DFF = 512
DROPOUT = 0.1

model = transformer(
    vocab_size=vocab_size,
    d_ff=DFF,
    d_model=D_MODEL,
    num_of_head=NUM_HEADS)

Location:
  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)

  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line 538, in run_forever
    self._run_once()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line

Location:
  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)

  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line 538, in run_forever
    self._run_once()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line

Location:
  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)

  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line 538, in run_forever
    self._run_once()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line

Location:
  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)

  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line 538, in run_forever
    self._run_once()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line

Location:
  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)

  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line 538, in run_forever
    self._run_once()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line











In [18]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):

    def __init__(self, d_model, warmup_steps=4000):
        super(CustomSchedule, self).__init__()
        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps**-1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

In [20]:
lr=CustomSchedule(D_MODEL)
optimizer=tf.keras.optimizers.Adam(lr,beta_1=0.9, beta_2=0.98, epsilon=1e-9)
loss='sparse_categorical_crossentropy'
acc=tf.keras.metrics.SparseCategoricalAccuracy()
model.compile(optimizer=optimizer,loss=loss,metrics=[acc])

In [26]:
EPOCHS = 50
model.fit(datasets, epochs=EPOCHS)

Epoch 1/50
Location:
  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)

  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line 538, in run_forever
    self._run_once()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_event

Location:
  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)

  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line 538, in run_forever
    self._run_once()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line

Location:
  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)

  File "/home/bmlserver/anaconda3/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 612, in start
    self.io_loop.start()

  File "/home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line 538, in run_forever
    self._run_once()

  File "/home/bmlserver/anaconda3/lib/python3.7/asyncio/base_events.py", line

ValueError: in user code:

    /home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    <ipython-input-14-cb9b570f3d0b>:29 call  *
        return tf.add(inputs,pos_mat)
    /home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tensorflow/python/ops/gen_math_ops.py:345 add  **
        "Add", x=x, y=y, name=name)
    /home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:750 _apply_op_helper
        attrs=attr_protos, op_def=op_def)
    /home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py:592 _create_op_internal
        compute_device)
    /home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:3536 _create_op_internal
        op_def=op_def)
    /home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:2016 __init__
        control_input_ops, op_def)
    /home/bmllab/bml_pjh/myenv/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:1856 _create_c_op
        raise ValueError(str(e))

    ValueError: Dimensions must be equal, but are 75 and 8181 for '{{node model_8/pos_enc/Add}} = Add[T=DT_FLOAT](model_8/embedding/embedding_lookup/Identity_1, model_8/pos_enc/Add/y)' with input shapes: [?,75,256], [8181,256].


In [25]:
import os