<a href="https://colab.research.google.com/github/Rnlcksgdkd/Project_AI/blob/ando/ando/ChatBot/ChatBot_Seq2Seq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **I. git 에서 데이터 및 소스 로드**

In [1]:
!git clone https://github.com/Rnlcksgdkd/Project_AI

Cloning into 'Project_AI'...
remote: Enumerating objects: 106, done.[K
remote: Counting objects: 100% (106/106), done.[K
remote: Compressing objects: 100% (65/65), done.[K
remote: Total 404 (delta 52), reused 81 (delta 39), pack-reused 298[K
Receiving objects: 100% (404/404), 305.57 MiB | 31.44 MiB/s, done.
Resolving deltas: 100% (177/177), done.


In [None]:
%cd /content/Project_AI
!git pull origin ando

In [22]:
import sys
import numpy as np
import json

DATA_PATH = "/content/Project_AI/ando/ChatBot/data/"

sys.path.append(DATA_PATH)

train_input = np.load(DATA_PATH + 'train_inputs.npy')
train_output = np.load(DATA_PATH + "train_outputs.npy")
train_targets = np.load(DATA_PATH + "train_targets.npy")

prepro_configs = json.load(open(DATA_PATH + "data_configs.json" , 'r'))

train_input.shape , train_output.shape , train_targets.shape ,  prepro_configs['vocab_size']



((11823, 25), (11823, 25), (11823, 25), 20705)

In [40]:
len(train_input)

11823

# II. **사전 준비**

In [None]:
!pip install konlpy

In [18]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt

PATH = "/content/Project_AI/ando/ChatBot/"
sys.path.append(PATH)
import preprocess

In [19]:
def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string], '')
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, 'val_'+string])
    plt.show()

In [20]:
SEED_NUM = 1234
tf.random.set_seed(SEED_NUM)

> ## **모델 파라미터**

In [23]:
MODEL_NAME = 'seq2seq_kor'
BATCH_SIZE = 2
MAX_SEQUENCE = 25
EPOCH = 30
UNITS = 1024
EMBEDDING_DIM = 256
VALIDATION_SPLIT = 0.1 

char2idx = prepro_configs['char2idx']
idx2char = prepro_configs['idx2char']
std_index = prepro_configs['std_symbol']
end_index = prepro_configs['end_symbol']
vocab_size = prepro_configs['vocab_size']


# **II. Seq2Seq Model**

> ## **인코더 / 디코더 / 어탠션 클래스 정의**

In [48]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Layer

class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.vocab_size = vocab_size 
        self.embedding_dim = embedding_dim          
        
        self.embedding = tf.keras.layers.Embedding(self.vocab_size, self.embedding_dim)
        self.gru = tf.keras.layers.GRU(self.enc_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')

    def call(self, x, hidden):
        x = self.embedding(x)
        output, state = self.gru(x, initial_state = hidden)
        return output, state

    def initialize_hidden_state(self, inp):
        return tf.zeros((tf.shape(inp)[0], self.enc_units))

class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        hidden_with_time_axis = tf.expand_dims(query, 1)

        score = self.V(tf.nn.tanh(
            self.W1(values) + self.W2(hidden_with_time_axis)))

        attention_weights = tf.nn.softmax(score, axis=1)

        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights
class Decoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
        super(Decoder, self).__init__()
        
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.vocab_size = vocab_size 
        self.embedding_dim = embedding_dim  
        
        self.embedding = tf.keras.layers.Embedding(self.vocab_size, self.embedding_dim)
        self.gru = tf.keras.layers.GRU(self.dec_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(self.vocab_size)

        self.attention = BahdanauAttention(self.dec_units)
        
    def call(self, x, hidden, enc_output):
        context_vector, attention_weights = self.attention(hidden, enc_output)

        x = self.embedding(x)

        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        output, state = self.gru(x)
        output = tf.reshape(output, (-1, output.shape[2]))
            
        x = self.fc(output)
        
        return x, state, attention_weights

> ## **Seq2Seq Model Define**

In [58]:
class seq2seq(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, dec_units, batch_sz, end_token_idx=2):    
        super(seq2seq, self).__init__()
        self.end_token_idx = end_token_idx
        self.encoder = Encoder(vocab_size, embedding_dim, enc_units, batch_sz) 
        self.decoder = Decoder(vocab_size, embedding_dim, dec_units, batch_sz) 

    def call(self, x):
        inp, tar = x
        
        enc_hidden = self.encoder.initialize_hidden_state(inp)
        enc_output, enc_hidden = self.encoder(inp, enc_hidden)

        dec_hidden = enc_hidden

        predict_tokens = list()
        for t in range(0, tar.shape[1]):
            dec_input = tf.dtypes.cast(tf.expand_dims(tar[:, t], 1), tf.float32) 
            predictions, dec_hidden, _ = self.decoder(dec_input, dec_hidden, enc_output)
            predict_tokens.append(tf.dtypes.cast(predictions, tf.float32))   
        return tf.stack(predict_tokens, axis=1)
    
    def inference(self, x):
        inp  = x

        enc_hidden = self.encoder.initialize_hidden_state(inp)
        enc_output, enc_hidden = self.encoder(inp, enc_hidden)

        dec_hidden = enc_hidden
        
        dec_input = tf.expand_dims([char2idx[std_index]], 1)
        
        predict_tokens = list()
        for t in range(0, MAX_SEQUENCE):
            predictions, dec_hidden, _ = self.decoder(dec_input, dec_hidden, enc_output)
            predict_token = tf.argmax(predictions[0])
            
            if predict_token == self.end_token_idx:
                break
            
            predict_tokens.append(predict_token)
            dec_input = tf.dtypes.cast(tf.expand_dims([predict_token], 0), tf.float32)   
            
        return tf.stack(predict_tokens, axis=0).numpy()

> ## **Optimizer / Loss Define**

In [59]:

optimizer = tf.keras.optimizers.Adam()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True , reduction = 'none')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name = 'accuracy')

def loss(real , pred):
  mask = tf.math.logical_not(tf.math.equal(real , 0))
  loss_ = loss_object(real , pred)
  mask = tf.cast(mask , dtype = loss_.dtype)
  loss_*= mask
  return tf.reduce_mean(loss_)

def accuracy(real , pred):
  mask = tf.math.logical_not(tf.math.equal(real , 0))
  mask = tf.expand_dims(tf.cast(mask , dtype = pred.dtype) , axis = 1)
  pred *= mask
  acc = train_accuracy(real , pred)
  return tf.reduce_mean(acc)


> ## **Build/Compile**

In [63]:

model = seq2seq(vocab_size , EMBEDDING_DIM , UNITS , UNITS , BATCH_SIZE , char2idx[end_index])
model.compile(loss = loss , optimizer = tf.keras.optimizers.Adam(1e-3) , metrics = [accuracy])


> ## **Model Fitting**

In [64]:
import os

MODEL_PATH = "/content/Project_AI/ando/ChatBot/model/"
PATH = MODEL_PATH + MODEL_NAME

if not(os.path.isdir(PATH)):
        os.makedirs(os.path.join(PATH))
        
checkpoint_path = MODEL_PATH + MODEL_NAME + '/weights.h5'
    
cp_callback = ModelCheckpoint(
    checkpoint_path, monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=True)

earlystop_callback = EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=10)

history = model.fit([train_input, train_output], train_targets , 
                    batch_size=BATCH_SIZE, epochs=EPOCH,
                    validation_split=VALIDATION_SPLIT)

Epoch 1/30


ValueError: ignored