In [None]:
import numpy as np
import pickle
import time

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, InputLayer, Bidirectional, TimeDistributed, Embedding
from tensorflow.keras.optimizers import Adam

from konlpy.tag import Mecab

# import warnings
# warnings.filterwarnings(action= 'ignore') # tensorflow 경고메세지 숨기기 -> 적용안됨.

def classification_bidirectional_lstm(text):
    
    pickle_path = '분류기/Save_Vocab/'
    with open(pickle_path + 'src_tokenizer.p', 'rb') as f:
        src_tokenizer = pickle.load(f)
    with open(pickle_path + 'tar_tokenizer.p', 'rb') as f:
        tar_tokenizer = pickle.load(f)
    
    index_to_word = src_tokenizer.index_word
    index_to_tag = tar_tokenizer.index_word
    vocab_size = len(src_tokenizer.word_index) + 1
    tag_size = len(tar_tokenizer.word_index) + 1
    
    def tokenize(samples):
        tokenizer = Tokenizer(oov_token="O")
        tokenizer.fit_on_texts(samples)
        return tokenizer

    max_len = 200
    
    embedding_dim = 128
    hidden_units = 128

    model = Sequential()
    model.add(Embedding(vocab_size, embedding_dim, mask_zero= True))
    model.add(Bidirectional(LSTM(hidden_units, return_sequences= True)))
    model.add(TimeDistributed(Dense(tag_size, activation= ('softmax'))))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(0.001), metrics=['accuracy'])
    
    # 가중치 로드
    saved_path = '분류기/Save_Model/bidirectional_lstm_(final)_Project_tags/'
    model.load_weights(saved_path + 'bidirectional_lstm_(final)_Project_tags')
    
    # mecab 객체 생성
    mecab = Mecab(dicpath = r"분류기/BiLSTM_Model/mecab/mecab-ko-dic")

    text = text.replace(" ", "_")

    morpheme = mecab.morphs(text)

    result_sen = []
    result_tag = []

    for i in range(len(morpheme)):
        sample = src_tokenizer.texts_to_sequences(morpheme)
        sample = pad_sequences(sample, padding= 'post', maxlen=max_len)


        y_predicted = model.predict(np.array([sample[i]]))
        y_predicted = np.argmax(y_predicted, axis= -1)

        for word, pred in zip(sample[i], y_predicted[0]):
            if word != 0:
                result_sen.append(index_to_word[word])
                result_tag.append(index_to_tag[pred].upper())
    
    return result_sen, result_tag