In [1]:
import pandas as pd
from konlpy.tag import Twitter
from pprint import pprint

In [2]:
# Action 정의
ACTION_TABLE = {}
ACTION_TABLE['stop'] = 0
ACTION_TABLE['now'] = 1
ACTION_TABLE['playlist_show'] = 2

In [7]:
new_docs = {'document':[], 'action':[]}

# stop action candinate words
tmp = ['스탑', '스톱', '그만', '노래 그만', '나가', '그만해', '그만해줘', '나가자', '꺼봐', 
       '꺼', '종료', '종료해', '종료좀', '꺼져', '음악 종료', '플레이 종료', '플레이 그만', '나가요', '그만해도되', 
       '멈춰줘', '멈춰', '멈춤']
new_docs['document'].extend(tmp)
new_docs['action'].extend([ACTION_TABLE['stop'] for a in tmp])

# now action candinate words
tmp = ['지금 듣고 있는게 뭐야?', '지금 듣고 있는거', '지금 뭐임?', '현재곡', 
       '지금 노래 제목', '지금 노래 제목좀', '지금 노래 제목좀 알려줘', '지금 노래 알려줘', 
       '지금 노래 알려주세요', '지금 노래 뭔지 알려줘', '현재 노래 정보', '지금 노래 정보', '현재 노래 알려줘']
new_docs['document'].extend(tmp)
new_docs['action'].extend([ACTION_TABLE['now'] for a in tmp])

# playlist_show action candinate words
tmp = ['내 플레이리스트 좀 보여줄래?', '내 플레이리스트 좀 보여줄래', '내 플레이리스트', '내꺼 확인', '내꺼 정보', 
       '내 플레이리스트 정보 확인', '내 플레이리스트 정보', '나의 플레이리스트 보여줘', '내꺼 확인하고싶어', '내 재생목록 확인할래', 
       '재생목록 확인좀 할께', '재생목록 확인', '재생목록 정보', '플레이리스트 정보', '내 리스트 보고싶어', '내꺼 확인할게', '내꺼 보여줘', 
       '내 재생목록 보여줄래?', '내 목록 보여줄래?', '내 목록 확인', '내 리스트 보여줄래?', '내 목록 정보']
new_docs['document'].extend(tmp)
new_docs['action'].extend([ACTION_TABLE['playlist_show'] for a in tmp])

df_docs = pd.DataFrame(new_docs)
df_docs

Unnamed: 0,document,action
0,스탑,0
1,스톱,0
2,그만,0
3,노래 그만,0
4,나가,0
5,그만해,0
6,그만해줘,0
7,나가자,0
8,꺼봐,0
9,꺼,0


In [16]:
def pos_processing(sentence, pos_engine):
    ret = [token + '/' + tag  for token, tag in pos_engine.pos(sentence, norm=True, stem=True) if tag not in ['Exclamation', 'Punctuation', 'Josa', 'KoreanParticle']]
    return ret

In [17]:
def gen_rules(nlp, df):
    intent_rules = {x:[] for x in df['action'].unique()}
    # print(intent_rules) # {0: [], 1: [], 2: []}
    if nlp is 'twitter':
        nlp_engine = Twitter()
        df['terms'] = df['document'].apply(lambda stmt: ' '.join(pos_processing(stmt, nlp_engine)))
    
    df['term_cnt'] = df['terms'].apply(lambda terms: len(terms.split()))
    for idx in df.index:
        msg, action = df.loc[idx, ['document', 'action']]
        candidate = set(df.loc[idx, 'terms'].split())
        if len(candidate) == 0:
            continue
        
        is_general, is_specific, is_conflict = None, None, None 
        for intent, rules in intent_rules.items():
            for x in range(len(rules)):
                rule = rules[x]
                # print("candidate: ", candidate, "rule: ", rule, "intent: ", intent, "action: ", action)
                if intent == action:
                    if candidate < rule:
                        is_general = (intent, x)
                    elif candidate >= rule:
                        is_specific = (intent, x)
                        # print("is_secific : ", is_specific)
                elif candidate <= rule or candidate >= rule:
                    is_conflict = (action, candidate, intent, rule)
        
        if is_conflict:
            print("conflict : ", is_conflict)
        elif is_general is not None:
            intent_rules[is_general[0]][is_general[1]] = candidate
        elif is_specific is not None:
            # print("is specific : ", is_specific)
            pass
        else:
            # print("add rule : candidate : {}, action : {}".format(candidate, action))
            intent_rules[action].append(candidate)
        
    return intent_rules

In [18]:
nlp_method = 'twitter'
generated_rules = gen_rules(nlp_method, df_docs)
pprint(generated_rules)

{0: [{'스탑/Noun'},
     {'스톱/Noun'},
     {'그만/Adverb'},
     {'나가다/Verb'},
     {'그만하다/Adjective'},
     {'끄다/Verb'},
     {'종료/Noun'},
     {'꺼지다/Verb'},
     {'나가요/Noun'},
     {'멈추다/Verb'}],
 1: [{'지금/Noun', '있다/Adjective', '듣다/Verb'},
     {'지금/Noun', '뭐임/Noun'},
     {'곡/Noun', '현재/Noun'},
     {'지금/Noun', '노래/Noun', '제목/Noun'},
     {'지금/Noun', '노래/Noun', '알다/Verb'},
     {'노래/Noun', '현재/Noun', '정보/Noun'},
     {'지금/Noun', '노래/Noun', '정보/Noun'},
     {'노래/Noun', '알다/Verb', '현재/Noun'}],
 2: [{'내/Noun', '리스트/Noun', '플레이/Noun'},
     {'내다/Verb', '확인/Noun'},
     {'내다/Verb', '정보/Noun'},
     {'보이다/Verb', '리스트/Noun', '나/Noun', '플레이/Noun'},
     {'내/Noun', '목록/Noun', '확인/Noun'},
     {'목록/Noun', '재생/Noun', '확인/Noun'},
     {'목록/Noun', '재생/Noun', '정보/Noun'},
     {'리스트/Noun', '정보/Noun', '플레이/Noun'},
     {'보다/Verb', '내/Noun', '리스트/Noun'},
     {'보이다/Verb', '내다/Verb'},
     {'내/Noun', '목록/Noun', '보이다/Verb'},
     {'내/Noun', '리스트/Noun', '보이다/Verb'},
     {'내/Noun', '목록/Noun', '정보/Noun'}]}

In [19]:
def apply_gen_rule(nlp, txt, generated_rules):
    if nlp is 'twitter':
        nlp_engine = Twitter()
        candidate = set(pos_processing(txt, nlp_engine))
    print(candidate)
    for action, rules in generated_rules.items():
        for rule in rules:
            if candidate >= rule:
                print('-->', action, rule, txt)
            else:
                print("rule: {}, action: {}".format(rule, action))

In [20]:
apply_gen_rule('twitter', '그만 들을래', generated_rules)

{'들다/Verb', '그만/Adverb'}
rule: {'스탑/Noun'}, action: 0
rule: {'스톱/Noun'}, action: 0
--> 0 {'그만/Adverb'} 그만 들을래
rule: {'나가다/Verb'}, action: 0
rule: {'그만하다/Adjective'}, action: 0
rule: {'끄다/Verb'}, action: 0
rule: {'종료/Noun'}, action: 0
rule: {'꺼지다/Verb'}, action: 0
rule: {'나가요/Noun'}, action: 0
rule: {'멈추다/Verb'}, action: 0
rule: {'지금/Noun', '있다/Adjective', '듣다/Verb'}, action: 1
rule: {'지금/Noun', '뭐임/Noun'}, action: 1
rule: {'곡/Noun', '현재/Noun'}, action: 1
rule: {'지금/Noun', '노래/Noun', '제목/Noun'}, action: 1
rule: {'지금/Noun', '노래/Noun', '알다/Verb'}, action: 1
rule: {'노래/Noun', '현재/Noun', '정보/Noun'}, action: 1
rule: {'지금/Noun', '노래/Noun', '정보/Noun'}, action: 1
rule: {'노래/Noun', '알다/Verb', '현재/Noun'}, action: 1
rule: {'내/Noun', '리스트/Noun', '플레이/Noun'}, action: 2
rule: {'내다/Verb', '확인/Noun'}, action: 2
rule: {'내다/Verb', '정보/Noun'}, action: 2
rule: {'보이다/Verb', '리스트/Noun', '나/Noun', '플레이/Noun'}, action: 2
rule: {'내/Noun', '목록/Noun', '확인/Noun'}, action: 2
rule: {'목록/Noun', '재생/Noun', '확인/Noun'},