In [95]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import json
import pickle
from collections import Counter
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
from konlpy.tag import Mecab
import numpy as np
import random
import os
import re
import nltk

torch.manual_seed(1)

<torch._C.Generator at 0x7fe1180227b0>

# Sentiment bot 

눈치 살살 살피면서 상대방 기분을 최대화해야 함.<br>
<br>
가능한 액션 :
* 동조하기
* 웃긴말 하기(아재 스탈)
* 사과하기
* 제시하기
* 인사하기

In [11]:
USE_CUDA = torch.cuda.is_available()

In [2]:
from Hangulpy import *

In [247]:
char_vocab = ['ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ',
                  'ㅂ', 'ㅃ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 
                  'ㅎ', 'ㅏ', 'ㅐ', 'ㅑ', 'ㅒ', 'ㅓ', 'ㅔ', 'ㅕ', 'ㅖ', 'ㅗ', 'ㅘ',
                  'ㅙ', 'ㅚ', 'ㅛ', 'ㅜ', 'ㅝ', 'ㅞ', 'ㅟ', 'ㅠ', 'ㅡ', 'ㅢ', 'ㅣ',
                  'ㄳ', 'ㄵ', 'ㄶ', 'ㄺ', 'ㄻ', 'ㄼ', 'ㄽ',';', '^',':',')','!','?',',',
                  'ㄾ', 'ㄿ', 'ㅀ', 'ㅄ','<ALPHA>','<NUM>','<OTHER>']
char_to_ix = {v:i for i,v in enumerate(char_vocab)}
ix_to_char = {v:k for k,v in char_to_ix.items()}

In [248]:
def prepare_char(token, char_to_ix):
    idxs=[]
    for s in token:
        if is_hangul(s):
            # 음소 단위 분해
            try:
                emso = list(decompose(s))
                if emso[-1]=='':
                    emso.pop()
            except:
                emso = s
            idxs.extend(list(map(lambda w: char_to_ix[w], emso)))
        else:
            candit=s
            if s.isalpha():
                candit='<ALPHA>'
            elif s.isnumeric():
                candit='<NUM>'
            else:
                candit='<OTHER>'

            try:
                idxs.append(char_to_ix[candit])
            except:
                idxs.append(char_to_ix['<OTHER>']) # '' 가 OTHER같이
    #idxs = list(map(lambda w: to_ix[w], seq))
    tensor = torch.LongTensor(idxs)
    tensor = Variable(tensor)
    if USE_CUDA: tensor = tensor.cuda()

    return tensor, len(idxs)

In [335]:
NEGATIVE = ("바보", "멍청", "멍충", "ㅂㅅ", "닥쳐" ,"ㅅㅂ", "시발", "ㄷㅈㄹ", "뒤질래", "빠가", "ㅡㅡ", "ㅆㅃ", "존못","씨발"
            "시벌", "아놔", "디질래", "시바", "시방", "ㄷㅊ", "바부", "으휴", "-_-", "뻐큐", "뻑", "별로", "별루", "별론",'?','? ㄴㄴ',
            "에효", "에휴","노노","ㄴㄴ", "ㅗㅗ", "아니야","아닌", "싫", "임마", "인마", "좌식","시러", "돌았", "도랏",';;', '??','?! 노노'
            ,"아오","밥팅","밥퉁","바부","답답","장난하", "됐다", "됐어", "에혀", "때려", "자식", "짜식" ,"ㅎㅌㅊ", "하타취",
            "쓋", "씟", "쒯", "혼날래", "멍추아", "멍처아", "혼난다", "빵꾸똥꾸", "떵꼬", "똥꼬", "아냐", "우씨","존싫","졸라 실어",
            "우씌", "짜증나", "짱나", "확 씨", "확씨", "이걸 확", "죽을래", "죽는다", "빡치게", "십새", "새끼", "새키",'어휴',
            "아는게 뭐야", "아는게 머야", "아는게뭐야", "아는게머야", "아는게 없어", "아는게없어", "아는겡버서", "얌마","틀렸","틀렷",
             "데엄", "젠좡", "젠장", "새캬", "그만", "주글래?", "혼난다", "장난치냐", "최악", "ㄲㅈ","ㅜㅜ","ㅠㅠ","ㅠㅠㅠㅠ",
           "흠", "음", "엥", "킁", "크항", "크앙", "끄항", "끙", "또르르", "쩝", "뻑큐", "씨발", "씨뻘롬", "아오","헐","허얼,,","어이없네")

POSITIVE = ("짱이다","짱이야", "올", "최고", "우와", "고마워", "ㄳ", "땡큐", "천재", "똑똑","감사" , "괜찮", "오호", "오홍", "땅큐", "땅켜",
            "괜춘", "쩐다", "쩔어", "굿", "굳", "ㅇㅋ", "예쁘다", "이쁘다", "멋있", "멋져", "멋있", "멋지다", "땡쓰","땡스","웅!!","아하!",
            "이뿌", "이쁘", "아름답", "ㅇㅇ", "웅", "그래", "그랩", "고뤠","그랭","좋다","좋아","잘했","잘한다", "잘하","고마워 ㅠㅠ","힝 고마워",
            "잘하네","갠춘","만족","오와","우오오","우오와", "울지마", "쓸만", "ㄱㅊ", "좋네", "ㅅㅌㅊ", "상타","죨라 좋다 ㅋ","하이","하잉",
            "오키","오케", "대단", "잘해" ,"힘내", "화이팅", "파이팅", "울지망", "뭐가미안","뭐가 미안", "뭐가 죄송",'고마워!','!!',"안뇽","헬로",
            "뭐가죄송","머가 죄송", "머가죄송","머가 미안", "머가미안", "그려", "조아", "죠아", "됴아","땅켜","땡! 켜!", "맘에 들어", "유후", "꺄오",
            "꺄올", "내 스탈", "내 스타일", "내스탈", "내스타일", "좋았어", "져아", "사랑", "좋지","꺄!","꺄~","존좋","존나 좋아","안녕","ㅎㅇ",
            "맘에 들", "마음에 들어", "맘에들엉", "마음에들어", "맘에든", "맘에 든", "마음에 든", "마음에든", "고맙","고마웡", "잘햇어","안녕ㅋㅋ",
            "잘있어~","ㅎㅇ!!","안녕!",
            "ㅋㅋ", "ㅎㅎ", "하하", "허허", "헤헤", "헤헿","헿", "핳", "크크","앜", "호호", "히히", "키키","ㅋㅋㅋㅋㅋ")

In [336]:
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1):
        super(CharRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        
        self.linear1 = nn.Linear(hidden_size,64)
        self.linear2 = nn.Linear(64,2)
        
    def forward(self, input):
        
        hidden = Variable(torch.zeros(self.n_layers, 1, self.hidden_size)) 
        
        if USE_CUDA:
            hidden = hidden.cuda()
        
        embedded = self.embedding(input)
        output, hidden = self.gru(embedded.unsqueeze(1), hidden)
        
        l1 = F.relu(self.linear1(hidden.squeeze(1)))
        out = F.log_softmax(self.linear2(l1))
                
        return out

In [337]:
train=[]
for n in NEGATIVE:
    train.append([n,0])
for p in POSITIVE:
    train.append([p,1])

In [338]:
len(train)

263

In [339]:
model = CharRNN(len(char_to_ix),32)
loss_function = nn.NLLLoss() 
optimizer= optim.Adam(model.parameters(), lr=0.01)

In [340]:
for step in range(30):
    losses=[]
    for i, (sent, label) in enumerate(train):

        model.zero_grad()

        model_input, length = prepare_char(sent,char_to_ix)
        hypothesis = model(model_input)
        loss = loss_function(hypothesis, Variable(torch.LongTensor([label])))
        loss.backward()
        losses.append(loss.data.tolist()[0])
        
        optimizer.step()
    if step%10==0:
        print(np.mean(losses))
        random.shuffle(train)

0.339748994185
0.280069474547
0.0119279152984


In [367]:
test =" "

In [368]:
test_input, _ = prepare_char(test,char_to_ix)
result = model(test_input)
v,i = torch.max(result,1)
print(v, i)

Variable containing:
-0.2031
[torch.FloatTensor of size 1x1]
 Variable containing:
 0
[torch.LongTensor of size 1x1]



In [349]:
torch.save(model, 'sent_model.pkl')

  "type " + obj.__name__ + ". It won't be checked "


In [399]:
def agree(sent):
    reply = random.choice(["맞아요", "그렇죠","그러게 말이에요~", "그 맘 이해해요 ㅠㅠ"])    
    return reply

def sorry(sent):
    reply = random.choice(["미안해요ㅠㅠ","죄송해요ㅠ"])
    return reply

def ask(sent):
    reply = random.choice(["어떤 고민이 있으신가요?"])
    return reply

def request(sent):
    reply = random.choice(["좀 더 자세히 말해주실래요?","좀 더 자세히 말해줘요!"])
    return reply

def suggest(sent):
    reply = random.choice(["오 이건 어때요?","이런거 어떠십니까?"])
    return reply

def greeting(sent):
    reply = random.choice(["안녕!"])
    return reply

In [393]:
reply_to_func = {0:agree, 1:sorry,2:ask,3:request,4:suggest,5:greeting}

In [408]:
exp = []

In [421]:
temp=[]
user_reply= " "
while(1):
    done=False
    try:
        user_said = user_reply
        user_input,_ = prepare_char(user_said,char_to_ix)
        sent = model(user_input)
        v,i = torch.max(sent,1)
        reward = v.data.tolist()[0][0]
        random_action = random.choice(range(6))
        bot_said = reply_to_func[random_action]("")
        print("BOT : ",bot_said, "Previous USER SENT : ",i.data.tolist()[0][0])
        user_reply = input()
        print("USER : ", user_reply)
        
        if user_reply in ["좋아","맘에 들어","굿"]:
            reward+=1.0
            done=True
        elif user_reply in ["됐다", "그만하자", "에휴"]:
            reward+=-10.0
            done=True
        temp.append([user_said,random_action,reward,user_reply])
        
        if done:
            exp.append(temp)
            break
    except Exception as e:
        print(str(e))
        break

BOT :  안녕! Previous USER SENT :  0
하이!
USER :  하이!
BOT :  어떤 고민이 있으신가요? Previous USER SENT :  1
내 고민은~
USER :  내 고민은~
BOT :  안녕! Previous USER SENT :  1
ㅎㅇ!
USER :  ㅎㅇ!
BOT :  안녕! Previous USER SENT :  1
ㅡㅡ
USER :  ㅡㅡ
BOT :  좀 더 자세히 말해줘요! Previous USER SENT :  0
ㅋㅋ웅
USER :  ㅋㅋ웅
BOT :  좀 더 자세히 말해주실래요? Previous USER SENT :  1
머가
USER :  머가
BOT :  안녕! Previous USER SENT :  0
굿
USER :  굿


In [423]:
pickle.dump(exp,open('explain_memory.pkl','wb'))