In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from copy import deepcopy
import nltk
import pickle
flatten = lambda l: [item for sublist in l for item in sublist]
from data_utils import *

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
data = open("train.iob","r",encoding="utf-8").readlines()

In [6]:
train_data=[]
history=[["<null>"]]
for d in data:
    if d=="\n":
        history=[["<null>"]]
        continue
    dd = d.replace("\n","").split("|||")
    if len(dd)==1:
        bot = tagger.morphs(dd[0])
        history.append(bot)
    else:
        user = dd[0].split()
        tag = dd[1].split()
        intent = dd[2]
        temp = deepcopy(history)
        train_data.append([temp,user,tag,intent])
        history.append(user)

In [12]:
train_data[2]

[[['<null>'],
  ['will', 'it', 'rain', 'this', 'week'],
  ['What',
   'city',
   'are',
   'you',
   'wanting',
   'to',
   'know',
   'if',
   'it',
   "'",
   's',
   'going',
   'to',
   'rain',
   '?']],
 ['Menlo', 'Park'],
 ['B-location', 'I-location'],
 'weather']

In [13]:
historys, currents, slots, intents = list(zip(*train_data))

In [14]:
vocab = list(set(flatten(currents)))
slot_vocab = list(set(flatten(slots)))
intent_vocab = list(set(intents))

In [15]:
word2index={"<pad>" : 0, "<unk>" : 1, "<null>" : 2, "<s>" : 3, "</s>" : 4}
for vo in vocab:
    if word2index.get(vo)==None:
        word2index[vo] = len(word2index)
        
slot2index={"<pad>" : 0}
for vo in slot_vocab:
    if slot2index.get(vo)==None:
        slot2index[vo] = len(slot2index)
        
intent2index={}
for vo in intent_vocab:
    if intent2index.get(vo)==None:
        intent2index[vo] = len(intent2index)

In [21]:
for t in train_data:
    for i,history in enumerate(t[0]):
        t[0][i] = prepare_sequence(history, word2index).view(1, -1)

    t[1] = prepare_sequence(t[1], word2index).view(1, -1)
    t[2] = prepare_sequence(t[2], slot2index).view(1, -1)
    t[3] = torch.LongTensor([intent2index[t[3]]]).view(1,-1)

In [22]:
class SDEN(nn.Module):
    def __init__(self,vocab_size,embed_size,hidden_size,slot_size,intent_size):
        super(SDEN,self).__init__()
        
        self.embed = nn.Embedding(vocab_size,embed_size)
        self.bigru_m = nn.GRU(embed_size,hidden_size,batch_first=True,bidirectional=True)
        self.bigru_c = nn.GRU(embed_size,hidden_size,batch_first=True,bidirectional=True)
        self.context_encoder = nn.Sequential(nn.Linear(hidden_size*4,hidden_size*2),
                                                               nn.Sigmoid())
        self.session_encoder = nn.GRU(hidden_size*2,hidden_size*2,batch_first=True,bidirectional=True)
        
        self.decoder_1 = nn.GRU(embed_size,hidden_size*2,batch_first=True,bidirectional=True)
        self.decoder_2 = nn.LSTM(hidden_size*4,hidden_size*2,batch_first=True,bidirectional=True)
        
        self.intent_linear = nn.Linear(hidden_size*4,intent_size)
        self.slot_linear = nn.Linear(hidden_size*4,slot_size)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self,history,current):
        batch_size = len(history)
        H= [] # encoded history
        for h in history:
            mask = h.eq(0)
            embeds = self.embed(h)
            embeds = self.dropout(embeds)
            outputs, hidden = self.bigru_m(embeds)
            real_hidden = []

            for i, o in enumerate(outputs): # B,T,D
                real_length = mask[i].data.tolist().count(0) 
                real_hidden.append(o[real_length - 1])

            H.append(torch.cat(real_hidden).view(h.size(0), -1).unsqueeze(0))
        
        M = torch.cat(H) # B,T_C,2H
        M = self.dropout(M)
        embeds = self.embed(current)
        embeds = self.dropout(embeds)
        mask = current.eq(0)
        outputs, hidden = self.bigru_c(embeds)
        real_hidden=[]
        for i, o in enumerate(outputs): # B,T,D
            real_length = mask[i].data.tolist().count(0) 
            real_hidden.append(o[real_length - 1])
        C = torch.cat(real_hidden).view(current.size(0),1, -1) # B,1,2H
        C = self.dropout(C)
        
        CONCAT = []
        for i in range(batch_size):
            m = M[i] # T_c,2H
            c = C[i] # 1,2H
            c = c.expand_as(m)
            cat = torch.cat([m,c],1)
            CONCAT.append(cat.unsqueeze(0))
        CONCAT = torch.cat(CONCAT)
        
        G = self.context_encoder(CONCAT)
        
        _,H = self.session_encoder(G) # 2,B,2H
        weight = next(self.parameters())
        cell_state = weight.new_zeros(H.size())
        O_1,_ = self.decoder_1(embeds)
        O_1 = self.dropout(O_1)
        
        O_2,(S_2,_) = self.decoder_2(O_1,(H,cell_state))
        O_2 = self.dropout(O_2)
        S = torch.cat([s for s in S_2],1)
        
        intent_prob = self.intent_linear(S)
        slot_prob = self.slot_linear(O_2.contiguous().view(O_2.size(0)*O_2.size(1),-1))
        
        return slot_prob, intent_prob

In [51]:
EPOCH = 20
BATCH = 32
LR = 0.001

In [52]:
model = SDEN(len(word2index),100,100,len(slot2index),len(intent2index))
slot_loss_function = nn.CrossEntropyLoss(ignore_index=0)
intent_loss_function = nn.CrossEntropyLoss()
model.to(device)
optimizer = optim.Adam(model.parameters(),lr=LR)
scheduler = optim.lr_scheduler.MultiStepLR(gamma=0.1,milestones=[EPOCH//4,EPOCH//2],optimizer=optimizer)

In [None]:
model.train()
for epoch in range(EPOCH):
    losses=[]
    scheduler.step()
    for i,batch in enumerate(data_loader(train_data,BATCH,True)):
        h,c,slot,intent = pad_to_batch(batch,word2index,slot2index)
        h = [hh.to(device) for hh in h]
        c = c.to(device)
        slot = slot.to(device)
        intent = intent.to(device)
        model.zero_grad()
        slot_p, intent_p = model(h,c)

        loss_s = slot_loss_function(slot_p,slot.view(-1))
        loss_i = intent_loss_function(intent_p,intent.view(-1))
        loss = loss_s + loss_i
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            print("[%d/%d] [%d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_data)//BATCH,np.mean(losses)))
            losses=[]

[0/20] [0/961] mean_loss : 4.585
[0/20] [100/961] mean_loss : 1.905
[0/20] [200/961] mean_loss : 1.092
[0/20] [300/961] mean_loss : 0.884


In [30]:
index2slot = {v:k for k,v in slot2index.items()}
index2intent = {v:k for k,v in intent2index.items()}

In [38]:
index = random.choice(range(len(data)))

In [49]:
index = random.choice(range(len(data)))
history=[prepare_sequence(["<null>"],word2index).view(1,-1)]
for d in data[index]['dialogue']:
    
    if d['turn']=='assistant':
        phrase = nltk.word_tokenize(d['data']['utterance'])
        phrase = prepare_sequence(phrase,word2index).view(1,-1)
        history.append(phrase)
    else:
        h = pad_to_history(history,word2index)
        c = nltk.word_tokenize(d['data']['utterance'])
        c = prepare_sequence(c,word2index).view(1,-1)
        with torch.no_grad():
            slot_p, intent_p = model(h,c)
        
        slots = slot_p.max(1)[1]
        intent = intent_p.max(1)[1]
        slots = [index2slot[i] for i in slots.tolist()]
        intent = index2intent[intent.item()]
        print(d['data']['utterance'])
        print(slots)
        print(intent)
        print("\n")
        history.append(c)

Schedule my 10 am dentist appointment on Friday with my father.
['O', 'O', 'B-event', 'O', 'I-event', 'I-event', 'O', 'B-date', 'O', 'O', 'O', 'O']
schedule


List my current schedule. 
['O', 'O', 'O', 'O', 'O']
navigate


List my schedule for Friday.
['O', 'O', 'O', 'O', 'O', 'O']
schedule


Thank you so much!
['O', 'O', 'O', 'O', 'O']
thanks




In [35]:
import pickle

In [36]:
model = model.cpu()
torch.save(model.state_dict(),'sden.pkl')

In [37]:
pickle.dump(word2index,open('vocab.pkl','wb'))
pickle.dump(slot2index,open('slot.pkl','wb'))
pickle.dump(intent2index,open('intent.pkl','wb'))