# Read Model and Get Computer's Opinion from Input

In [2]:
from keras.models import load_model
from keras_preprocessing.text import tokenizer_from_json
from konlpy.tag import Twitter
import json
import numpy as np

model_pname = load_model('ML_PNAME.h5')
model_pmake = load_model('ML_PMAKE.h5')

with open('mpbase_pname_tokenizer.json') as f:
    mpbase_pname_data = json.load(f)
    mpbase_pname_tokenizer = tokenizer_from_json(mpbase_pname_data)

with open('mpbase_pmake_tokenizer.json') as f:
    mpbase_pmake_data = json.load(f)
    mpbase_pmake_tokenizer = tokenizer_from_json(mpbase_pmake_data)

with open('pname_tokenizer.json') as f:
    pname_data = json.load(f)
    pname_tokenizer = tokenizer_from_json(pname_data)

with open('pmake_tokenizer.json') as f:
    pmake_data = json.load(f)
    pmake_tokenizer = tokenizer_from_json(pmake_data)

def split(text):
    results = []
    twitter = Twitter()
    malist = twitter.pos(text, norm=True, stem=True)
    for word in malist:
        if not word[1] in ["Josa", "Eomi", "Punctuation", "Foreign", "Number", "Alpha"]:
             results += word[0] + " "
    return results

def vectorize_sequences_pname(sequences, dimension = 10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

def vectorize_sequences_pmake(sequences, dimension = 14000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

Using TensorFlow backend.








Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [3]:
fr = open("input.txt", "r", encoding="UTF8")
fw = open("output.txt", "w", encoding="UTF8")

while True:
    x = fr.readline()
    if not x : break
    
    x_list = ["" for row in range(1)]
    x_list[0] = ''.join(split(x))
    pname_sequences = mpbase_pname_tokenizer.texts_to_sequences(x_list)
    pmake_sequences = mpbase_pmake_tokenizer.texts_to_sequences(x_list)

    pname_vector = vectorize_sequences_pname(pname_sequences)
    pmake_vector = vectorize_sequences_pmake(pmake_sequences)
    
    predictions_pname = model_pname.predict(pname_vector)
    predictions_pmake = model_pmake.predict(pmake_vector)
    
    reverse_word_map_pname = dict(map(reversed, pname_tokenizer.word_index.items()))
    reverse_word_map_pmake = dict(map(reversed, pmake_tokenizer.word_index.items()))
    
    input_pname = predictions_pname[0]
    input_pmake = predictions_pmake[0]
    
    idx_pname = np.flip(np.argsort(input_pname), 0)
    idx_pmake = np.flip(np.argsort(input_pmake), 0)

    for j in idx_pname[:1]:
        pname_result = reverse_word_map_pname[j]
        pname_percent = "{:4.2f}".format(100 * input_pname[j])
        fw.write(pname_result + "\n")
        fw.write(pname_percent + "\n")
        
    for j in idx_pmake[:1]:
        pmake_result = reverse_word_map_pmake[j]
        pmake_percent = "{:4.2f}".format(100 * input_pmake[j])
        fw.write(pmake_result + "\n")
        fw.write(pmake_percent + "\n")
        
fr.close()
fw.close()

  warn('"Twitter" has changed to "Okt" since KoNLPy v0.4.5.')
-------------------------------------------------------------------------------
Deprecated: convertStrings was not specified when starting the JVM. The default
behavior in JPype will be False starting in JPype 0.8. The recommended setting
for new code is convertStrings=False.  The legacy value of True was assumed for
please file a ticket with the developer.
-------------------------------------------------------------------------------

  """)
