In [1]:
!pip install tensorflow-gpu==1.11.0



In [2]:
!pip install tensorflow==1.11.0
!pip install "tensorflow_hub>=0.6.0"




In [3]:
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf


def margin_loss(labels, raw_logits, margin=0.4, downweight=0.5):
    """Penalizes deviations from margin for each logit.
    Each wrong logit costs its distance to margin. For negative logits margin is
    0.1 and for positives it is 0.9. First subtract 0.5 from all logits. Now
    margin is 0.4 from each side.
    Args:
    labels: tensor, one hot encoding of ground truth.
    raw_logits: tensor, model predictions in range [0, 1]
    margin: scalar, the margin after subtracting 0.5 from raw_logits.
    downweight: scalar, the factor for negative cost.
    Returns:
    A tensor with cost for each data point of shape [batch_size].
    """
    logits = raw_logits - 0.5
    positive_cost = labels * tf.cast(tf.less(logits, margin),
                                     tf.float32) * tf.pow(logits - margin, 2)
    negative_cost = (1 - labels) * tf.cast(
        tf.greater(logits, -margin), tf.float32) * tf.pow(logits + margin, 2)
    return 0.5 * positive_cost + downweight * 0.5 * negative_cost


def createVocabulary(input_path, output_path, pad=True, unk=True):
    # if not isinstance(input_path, str):
    #     raise TypeError('input_path should be string')

    if not isinstance(output_path, str):
        raise TypeError('output_path should be string')
    fd=input_path
    vocab = {}
    with open(output_path, 'w+') as out:
        for line in fd:
            line = line.rstrip('\r\n')
            words = line.split()

            for w in words:
                if w == '_UNK':
                    break
                if str.isdigit(w) == True:
                    w = '0'
                if w in vocab:
                    vocab[w] += 1
                else:
                    vocab[w] = 1
        init_vocab = []
        if pad:
            init_vocab.append('_PAD')
        if unk:
            init_vocab.append('_UNK')
        vocab = sorted(vocab, key=vocab.get, reverse=True) + init_vocab

        for v in vocab:
            out.write(v + '\n')


def loadVocabulary(path):
    if not isinstance(path, str):
        raise TypeError('path should be a string')

    vocab = []
    rev = []
    with open(path) as fd:
        for line in fd:
            line = line.rstrip('\r\n')
            rev.append(line)
        vocab = dict([(x, y) for (y, x) in enumerate(rev)])

    return {'vocab': vocab, 'rev': rev}


def sentenceToIds(data, vocab, unk):
    if not isinstance(vocab, dict):
        raise TypeError('vocab should be a dict that contains vocab and rev')
    vocab = vocab['vocab']
    if isinstance(data, str):
        words = data.split()
    elif isinstance(data, list):
        words = data
    else:
        raise TypeError('data should be a string or a list contains words')

    ids = []
    if unk:
        for w in words:
            if str.isdigit(w) == True:
                w = '0'
            ids.append(vocab.get(w, vocab['_UNK']))
    else:
        for w in words:
            if str.isdigit(w) == True:
                w = '0'
            ids.append(vocab.get(w))

    return ids


def padSentence(s, max_length, vocab):
    return s + [vocab['vocab']['_PAD']] * (max_length - len(s))


# compute f1 score is modified from conlleval.pl
def __startOfChunk(prevTag, tag, prevTagType, tagType, chunkStart=False):
    if prevTag == 'B' and tag == 'B':
        chunkStart = True
    if prevTag == 'I' and tag == 'B':
        chunkStart = True
    if prevTag == 'O' and tag == 'B':
        chunkStart = True
    if prevTag == 'O' and tag == 'I':
        chunkStart = True

    if prevTag == 'E' and tag == 'E':
        chunkStart = True
    if prevTag == 'E' and tag == 'I':
        chunkStart = True
    if prevTag == 'O' and tag == 'E':
        chunkStart = True
    if prevTag == 'O' and tag == 'I':
        chunkStart = True

    if tag != 'O' and tag != '.' and prevTagType != tagType:
        chunkStart = True
    return chunkStart


def __endOfChunk(prevTag, tag, prevTagType, tagType, chunkEnd=False):
    if prevTag == 'B' and tag == 'B':
        chunkEnd = True
    if prevTag == 'B' and tag == 'O':
        chunkEnd = True
    if prevTag == 'I' and tag == 'B':
        chunkEnd = True
    if prevTag == 'I' and tag == 'O':
        chunkEnd = True

    if prevTag == 'E' and tag == 'E':
        chunkEnd = True
    if prevTag == 'E' and tag == 'I':
        chunkEnd = True
    if prevTag == 'E' and tag == 'O':
        chunkEnd = True
    if prevTag == 'I' and tag == 'O':
        chunkEnd = True

    if prevTag != 'O' and prevTag != '.' and prevTagType != tagType:
        chunkEnd = True
    return chunkEnd


def __splitTagType(tag):
    s = tag.split('-')
    if len(s) > 2 or len(s) == 0:
        raise ValueError('tag format wrong. it must be B-xxx.xxx')
    if len(s) == 1:
        tag = s[0]
        tagType = ""
    else:
        tag = s[0]
        tagType = s[1]
    return tag, tagType


def computeF1Score(correct_slots, pred_slots):
    correctChunk = {}
    correctChunkCnt = 0
    foundCorrect = {}
    foundCorrectCnt = 0
    foundPred = {}
    foundPredCnt = 0
    correctTags = 0
    tokenCount = 0
    for correct_slot, pred_slot in zip(correct_slots, pred_slots):
        inCorrect = False
        lastCorrectTag = 'O'
        lastCorrectType = ''
        lastPredTag = 'O'
        lastPredType = ''
        for c, p in zip(correct_slot, pred_slot):
            correctTag, correctType = __splitTagType(c)
            predTag, predType = __splitTagType(p)

            if inCorrect == True:
                if __endOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True and \
                        __endOfChunk(lastPredTag, predTag, lastPredType, predType) == True and \
                        (lastCorrectType == lastPredType):
                    inCorrect = False
                    correctChunkCnt += 1
                    if lastCorrectType in correctChunk:
                        correctChunk[lastCorrectType] += 1
                    else:
                        correctChunk[lastCorrectType] = 1
                elif __endOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) != \
                        __endOfChunk(lastPredTag, predTag, lastPredType, predType) or \
                        (correctType != predType):
                    inCorrect = False

            if __startOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True and \
                    __startOfChunk(lastPredTag, predTag, lastPredType, predType) == True and \
                    (correctType == predType):
                inCorrect = True

            if __startOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True:
                foundCorrectCnt += 1
                if correctType in foundCorrect:
                    foundCorrect[correctType] += 1
                else:
                    foundCorrect[correctType] = 1

            if __startOfChunk(lastPredTag, predTag, lastPredType, predType) == True:
                foundPredCnt += 1
                if predType in foundPred:
                    foundPred[predType] += 1
                else:
                    foundPred[predType] = 1

            if correctTag == predTag and correctType == predType:
                correctTags += 1

            tokenCount += 1

            lastCorrectTag = correctTag
            lastCorrectType = correctType
            lastPredTag = predTag
            lastPredType = predType

        if inCorrect == True:
            correctChunkCnt += 1
            if lastCorrectType in correctChunk:
                correctChunk[lastCorrectType] += 1
            else:
                correctChunk[lastCorrectType] = 1
    # from sklearn.metrics import f1_score
    # f1 =f1_score(flatten(correct_slots), flatten(pred_slots), average='micro')
    if foundPredCnt > 0:
        precision = 100 * correctChunkCnt / foundPredCnt
    else:
        precision = 0

    if foundCorrectCnt > 0:
        recall = 100 * correctChunkCnt / foundCorrectCnt
    else:
        recall = 0

    if (precision + recall) > 0:
        f1 = (2 * precision * recall) / (precision + recall)
    else:
        f1 = 0
    # from sklearn.metrics import precision_score
    # precision=precision_score(flatten(correct_slots),flatten(pred_slots),average='micro')
    # from sklearn.metrics import recall_score
    # recall=recall_score(flatten(correct_slots),flatten(pred_slots),average='micro')
    # f1_score=f1_score*100
    # precision=precision*100
    # recall=recall*100
    return f1, precision, recall


class DataProcessor(object):
    def __init__(self, in_path, slot_path, intent_path, in_vocab, slot_vocab, intent_vocab, shuffle=False):
        # self.__fd_in = open(in_path, 'r').readlines()
        # self.__fd_slot = open(slot_path, 'r').readlines()
        # self.__fd_intent = open(intent_path, 'r').readlines()
        self.__fd_in=in_path
        self.__fd_slot=slot_path
        self.__fd_intent=intent_path
        if shuffle:
            self.shuffle()
        self.__in_vocab = in_vocab
        self.__slot_vocab = slot_vocab
        self.__intent_vocab = intent_vocab
        self.end = 0
        #print(self.__fd_in,self.__fd_slot,self.__fd_intent)
    def close(self):
        self.__fd_in.close()
        self.__fd_slot.close()
        self.__fd_intent.close()

    def shuffle(self):
        from sklearn.utils import shuffle
        self.__fd_in, self.__fd_slot, self.__fd_intent = shuffle(self.__fd_in, self.__fd_slot, self.__fd_intent)
        #print('1')
    def get_batch(self, batch_size):
        #print('2')
        in_data = []
        slot_data = []
        slot_weight = []
        length = []
        intents = []

        batch_in = []
        batch_slot = []
        max_len = 0

        in_seq = []
        slot_seq = []
        intent_seq = []
        #print('3')
        for i in range(batch_size):
            try:
                inp = self.__fd_in.pop()
            except IndexError:

                
                self.end = 1
                break
            slot = self.__fd_slot.pop()
            intent = self.__fd_intent.pop()
            inp =inp.rstrip()
            slot =slot.rstrip()
            intent = intent.rstrip()
            if slot==None:
              self.end=1
              break
            # slot=self.__fd_slot[i]
            # intent=self.__fd_intent[i]
            # inp=self.__fd_in[i]
            #print(slot,intent,inp)
            in_seq.append(inp)
            slot_seq.append(slot)
            intent_seq.append(intent)

            iii = inp
            sss = slot
            inp = sentenceToIds(inp, self.__in_vocab, unk=True)
            slot = sentenceToIds(slot, self.__slot_vocab, unk=True)
            intent = sentenceToIds(intent, self.__intent_vocab, unk=False)
            #print('4')
            if None not in intent:
                batch_in.append(np.array(inp))
                batch_slot.append(np.array(slot))
                length.append(len(inp))
                intents.append(intent[0])
            #print('5')
            if len(inp) != len(slot):
                print(iii, sss)
                print(inp, slot)
                exit(0)
            if len(inp) > max_len:
                max_len = len(inp)
              
        length = np.array(length)
        #print('6')
        intents = np.array(intents)
        for i, s in zip(batch_in, batch_slot):
            in_data.append(padSentence(list(i), max_len, self.__in_vocab))
            slot_data.append(padSentence(list(s), max_len, self.__slot_vocab))

        in_data = np.array(in_data)
        slot_data = np.array(slot_data)
        #print('7')
        for s in slot_data:
            #print('8')
            s=np.array(s)
            #print('9')
            weight = np.not_equal(s, np.full(s.shape, self._DataProcessor__slot_vocab['vocab']['_PAD']))
            weight = weight.astype(np.float32)
            #print('10')
            slot_weight.append(weight)
        slot_weight = np.array(slot_weight)
        #print('11')
        return in_data, slot_data, slot_weight, length, intents, in_seq, slot_seq, intent_seq

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [4]:
from google.colab import auth
auth.authenticate_user()
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [6]:
import os

class Reader:
    def __init__(self):
        pass

    def read(dataset_folder_path):
        wb = gc.open_by_url(dataset_folder_path)
        sheet=wb.sheet1
        data = sheet.get_all_values()
        df = pd.DataFrame(data)
        df.columns = df.iloc[0]
        new_data=df.to_numpy()
        text_arr=new_data[1:,0]
        text_arr_new=[]
        tag_arr=new_data[1:,2]
        tag_arr_new=[]
        label_arr=new_data[1:,1]
        label_arr_new=[]
        for i in range(0,len(tag_arr)):
          if len(tag_arr[i])>3:
            text_arr_new.append(text_arr[i])
            tag_arr_new.append(tag_arr[i])
            label_arr_new.append(label_arr[i])
        print(len(text_arr_new))
        print(len(tag_arr_new))
        print(len(label_arr_new))
        count=0;

        # with open(os.path.join(dataset_folder_path, 'label'), encoding='utf-8') as f:
        #     labels = f.readlines()

        # with open(os.path.join(dataset_folder_path, 'seq.in'), encoding='utf-8') as f:
        #     text_arr = f.readlines()

        # with open(os.path.join(dataset_folder_path, 'seq.out'), encoding='utf-8') as f:
        #     tags_arr = f.readlines()

        # assert len(text_arr) == len(tags_arr) == len(labels) # test by using 'assert'
        
        # ext_arr_new=np.array(text_arr_new)
        # tag_arr_new=np.array(tag_arr_new)
        # label_arr_new=np.array(label_arr_new)
        n1=None
        n2=None
        n3=None
        n1=text_arr_new
        n2=tag_arr_new
        n3=label_arr_new
        return n1,n2,n3

if __name__ == '__main__':
      text_arr_new,tag_arr_new,label_arr_new=Reader.read('https://docs.google.com/spreadsheets/d/1hqpyPcAIY-P9hoChU69SXLRGfhuKFGmQE4vvcp9RSdU/edit#gid=0') #link of dataset of google sheet
      print(text_arr_new, tag_arr_new, label_arr_new)

8357
8357
8357
['hi, can you help me in buying a phone ?', 'it should have good camera and good battery capacity', 'how much does it cost ? ', "what is it's color ? ", 'oh, is it possible in black ? ', 'but I like black', "what is it's cost ? ", 'ok can you book Redmi A for me', 'Thanks for help', 'hi, I want to buy a new phone', 'It should have OS android and dual sim features', 'what is its cost ?', 'what color is it ?', 'Oh, is it possible to get in white color ?', 'but I would prefer in white', "what's the price ?", 'Okay, I will book Oppo AA model', 'Thanks a lot ', 'Hey, I am looking for a good phone', 'It should be touch phone with good memory and high camera quality', 'What is the price ?', 'Does it has DualSim ?', 'Is it new released phone ?', 'What is its OS ?', 'Oh it is not good for me. I want Android Operating phone', 'But I am comfortable with Android Operating phone', 'What is the cost ?', 'ok can you book Oppo AA  for me', 'Thanks for help', 'Hey, can you help me in buy

In [7]:
tag_arr_new=np.array(tag_arr_new)
tag_arr_new2=[]
for i in range(len(tag_arr_new)):
  
  if tag_arr_new[i]=='null':
    tag=''
    for k in range(len(text_arr_new[i].split())):
      #print(k)
      tag=tag+' '+'O'
    tag_arr_new2.append(tag)
  else:
    tag_arr_new2.append(tag_arr_new[i])

In [8]:
train_text_arr=[]
train_tags_arr=[]
train_intents=[]
for i in range(0,5000):
  train_text_arr.append(text_arr_new[i])
  train_tags_arr.append(tag_arr_new2[i])
  train_intents.append(label_arr_new[i])
print(len(train_text_arr),len(train_tags_arr),len(train_intents))

5000 5000 5000


In [9]:
val_text_arr=[]
val_tags_arr=[]
val_intents=[]
for i in range(5000,7000):
  val_text_arr.append(text_arr_new[i])
  val_tags_arr.append(tag_arr_new2[i])
  val_intents.append(label_arr_new[i])
print(len(val_text_arr),len(val_tags_arr),len(val_intents))

2000 2000 2000


In [10]:
data_text_arr=[]
data_tags_arr=[]
data_intents=[]
for i in range(7000,8357):
  data_text_arr.append(text_arr_new[i])
  data_tags_arr.append(tag_arr_new2[i])
  data_intents.append(label_arr_new[i])
print(len(data_text_arr),len(data_tags_arr),len(data_intents))

1357 1357 1357


In [11]:
from itertools import chain
def flatten(y):
    ## flatten a list of lists.
    ## flatten([[1,2], [3,4]]) --> [1, 2, 3, 4]
    return list(chain.from_iterable(y))


In [None]:
print(flatten(data_tags_arr))

[' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'B', '-', 'R', 'e', 'l', 'e', 'a', 's', 'e', 'd', '_', 'Y', 'r', ' ', 'O', ' ', 'O', ' ', 'B', '-', 'C', 'o', 'l', 'o', 'r', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', 'O', ' ', 'O', ' ', 'B', '-', 'R', 'a', 'd', 'i', 'o', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'B', '-', 'B', 'r', 'a', 'n', 'd', ' ', 'O', ' ', 'O', ' ', 'B', '-', 'B', 'a', 't', 't', 'e', 'r', 'y', ' ', 'O', ' ', 'O', ' ', 'O', ' ', 'O', ' ',

In [11]:
import os
import argparse
import logging
import sys
import tensorflow as tf
import numpy as np
from tensorflow.contrib.rnn.python.ops import core_rnn_cell
from tensorflow.python.ops import rnn_cell_impl

#from utils import createVocabulary, loadVocabulary, computeF1Score, DataProcessor

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# tf.set_random_seed(20181226)  
# np.random.seed(20181226)
# todo: 1. word pre-train embedding, gru, crf, lr decay

parser = argparse.ArgumentParser(allow_abbrev=False)

# Network
parser.add_argument("--num_units", type=int, default=64, help="Network size.", dest='layer_size',required=False)
parser.add_argument("--model_type", type=str, default='full', help="""full(default) | intent_only
                                                                    full: full attention model
                                                                    intent_only: intent attention model""",required=False)
parser.add_argument("--priority_order", type=str, default='slot_first', help="""Type 'slot_first' or 'intent_first'
                                                                              to decide whose influence ought to calculate first use.""",required=False)
parser.add_argument("--use_crf", type=bool, default=True, help="""use crf for seq labeling""",required=False)
parser.add_argument("--use_embedding", type=str, default='1', help="""use pre-trained embedding""",required=False)
parser.add_argument("--cell", type=str, default='lstm', help="""rnn cell""",required=False) 
parser.add_argument("--iteration_num", type=int, default=1, help="""the number of iteration times""",required=False)

# Training Environment
parser.add_argument("--batch_size", type=int, default=16, help="Batch size.",required=False)
parser.add_argument("--batch_size_add", type=int, default=8, help="Batch size add.",required=False)
parser.add_argument("--max_epochs", type=int, default=100, help="Max epochs to train.",required=False)  
parser.add_argument("--no_early_stop", action='store_false', dest='early_stop',
                    help="Disable early stop, which is based on sentence level accuracy.",required=False)
parser.add_argument("--patience", type=int, default=15, help="Patience to wait before stop.",required=False)
# learn rate param
parser.add_argument("--learning_rate_decay", type=str, default='1', help="learning_rate_decay",required=False)
parser.add_argument("--learning_rate", type=float, default=0.001, help="The initial learning rate.",required=False)
parser.add_argument("--decay_steps", type=int, default=280 * 4, help="decay_steps.",required=False)
parser.add_argument("--decay_rate", type=float, default=0.9, help="decay_rate.",required=False)

# Model and Vocab
parser.add_argument("--dataset", type=str, default='atis', help="""Type 'atis' or 'snips' to use dataset provided by us or enter what ever you named your own dataset.
                Note, if you don't want to use this part, enter --dataset=''. It can not be None""",required=False)
parser.add_argument("--model_path", type=str, default='./model', help="Path to save model.",required=False)
parser.add_argument("--vocab_path", type=str, default='./vocab', help="Path to vocabulary files.",required=False)

# Data
parser.add_argument("--train_data_path", type=str, default='train', help="Path to training data files.",required=False)
parser.add_argument("--test_data_path", type=str, default='test', help="Path to testing data files.",required=False)
parser.add_argument("--valid_data_path", type=str, default='valid', help="Path to validation data files.",required=False)
parser.add_argument("--input_file", type=str, default='seq.in', help="Input file name.",required=False)
parser.add_argument("--slot_file", type=str, default='seq.out', help="Slot file name.",required=False)
parser.add_argument("--intent_file", type=str, default='label', help="Intent file name.",required=False)
parser.add_argument("--embedding_path", type=str, default='', help="embedding array's path.",required=False)

arg = parser.parse_args(''.split())
if arg.dataset=='atis':
    arg.model_type='intent_only'
else:
    arg.model_type='full'

# Print arguments
for k, v in sorted(vars(arg).items()):
    print(k, '=', v)
print()
# use full attention or intent only
if arg.model_type == 'full':
    remove_slot_attn = False
elif arg.model_type == 'intent_only':
    remove_slot_attn = True
else:
    print('unknown model type!')
    exit(1)

# full path to data will be: ./data + dataset + train/test/valid
if arg.dataset == None:
    print('name of dataset can not be None')
    exit(1)
elif arg.dataset == 'snips':
    print('use snips dataset')
elif arg.dataset == 'atis':
    print('use atis dataset')
else:
    print('use own dataset: ', arg.dataset)
full_train_path = os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/data', arg.dataset, arg.train_data_path)
full_test_path = os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/data', arg.dataset, arg.test_data_path)
full_valid_path = os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/data', arg.dataset, arg.valid_data_path)

createVocabulary(train_text_arr, os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'in_vocab'))
createVocabulary(train_tags_arr, os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'slot_vocab'))
createVocabulary(train_intents, os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'intent_vocab'),pad=False, unk=False)
# return map: {'vocab': vocab, 'rev': rev}, vocab: map, rev: array
in_vocab = loadVocabulary(os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'in_vocab'))
slot_vocab = loadVocabulary(os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'slot_vocab'))
intent_vocab = loadVocabulary(os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'intent_vocab'))



def createModel(input_data, input_size, sequence_length, slots, slot_size, intent_size, layer_size=128,
                isTraining=True):
    cell_fw = tf.contrib.rnn.BasicLSTMCell(layer_size)
    cell_bw = tf.contrib.rnn.BasicLSTMCell(layer_size)

    if isTraining == True:
        cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=0.5,
                                                output_keep_prob=0.5)
        cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=0.5,
                                                output_keep_prob=0.5)
    # embedding layer， [word size, embed size] 724, 64
    if arg.embedding_path:
        embedding_weight = np.load(arg.embedding_path)
        embedding = tf.Variable(embedding_weight, name='embedding', dtype=tf.float32)
    else:
        embedding = tf.get_variable('embedding', [input_size, layer_size])
    # [bs, nstep, embed size]
    inputs = tf.nn.embedding_lookup(embedding, input_data)
    # state_outputs: [bs, nstep, embed size], final_state: [4, bs, embed size] include cell state * 2, hidden state * 2
    state_outputs, final_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs,
                                                                 sequence_length=sequence_length, dtype=tf.float32)
    # [bs, embed size * 4]
    final_state = tf.concat([final_state[0][0], final_state[0][1], final_state[1][0], final_state[1][1]], 1)
    # [bs, nstep, embed size * 2]
    state_outputs = tf.concat([state_outputs[0], state_outputs[1]], 2)
    state_shape = state_outputs.get_shape()

    with tf.variable_scope('attention'):
        # [bs, nstep, embed size * 2]
        slot_inputs = state_outputs 
        if not remove_slot_attn:
            with tf.variable_scope('slot_attn'):
                # embed size * 2
                attn_size = state_shape[2].value
                origin_shape = tf.shape(state_outputs) 
                # [bs, 1, nstep, embed size * 2]
                hidden = tf.expand_dims(state_outputs, 1)
                # [bs, nstep, 1, embed size * 2]
                hidden_conv = tf.expand_dims(state_outputs, 2)
                # k: [filter_height, filter_width, in_channels, out_channels]
                k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size])
                # [bs, nstep, 1, embed size * 2]
                hidden_features = tf.nn.conv2d(hidden_conv, k, [1, 1, 1, 1], "SAME")  
                # [bs, nstep, embed size * 2]
                hidden_features = tf.reshape(hidden_features, origin_shape)
                # [bs, 1, nstep, embed size * 2]
                hidden_features = tf.expand_dims(hidden_features, 1)
                v = tf.get_variable("AttnV", [attn_size])

                slot_inputs_shape = tf.shape(slot_inputs)
                # [bs * nstep, embed size * 2]
                slot_inputs = tf.reshape(slot_inputs, [-1, attn_size])
                # [bs * nstep, embed size * 2]
                y = core_rnn_cell._linear(slot_inputs, attn_size, True) 
                # [bs , nstep, embed size * 2]
                y = tf.reshape(y, slot_inputs_shape)
                # [bs , nstep, 1, embed size * 2]
                y = tf.expand_dims(y, 2)
                # [bs , nstep, nstep] = [bs, 1, nstep, hidden size] + [bs , nstep, 1, embed size * 2]
                s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [3])
                a = tf.nn.softmax(s)
                # a shape = [bs, nstep, nstep, 1]
                a = tf.expand_dims(a, -1)
                # a shape = [bs, nstep, embed size * 2]
                slot_d = tf.reduce_sum(a * hidden, [2])
                slot_output = tf.reshape(slot_d,[-1,attn_size])
        else:
            attn_size = state_shape[2].value
            slot_d=state_outputs
            slot_inputs = tf.reshape(slot_inputs, [-1, attn_size])
            slot_output = slot_inputs

        intent_input = final_state
        with tf.variable_scope('intent_attn'):
            attn_size = state_shape[2].value 
            # [bs, nstep, 1, embed size * 2]
            hidden = tf.expand_dims(state_outputs, 2)
            k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size])
            # [bs, nstep, 1, embed size * 2]
            hidden_features = tf.nn.conv2d(hidden, k, [1, 1, 1, 1], "SAME")
            v = tf.get_variable("AttnV", [attn_size])

            # [bs, embed size * 2]
            y = core_rnn_cell._linear(intent_input, attn_size, True)
            # [bs, 1, 1, embed size * 2]
            y = tf.reshape(y, [-1, 1, 1, attn_size])
            # [bs, nstep]
            s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [2, 3])
            a = tf.nn.softmax(s)
            # [bs, nstep, 1]
            a = tf.expand_dims(a, -1)
            # [bs, nstep, 1, 1]
            a = tf.expand_dims(a, -1)
            # [bs, embed size * 2]
            d = tf.reduce_sum(a * hidden, [1, 2]) 
            intent_output = d
            #[bs, embedding * 2]
            intent_context_states = intent_output
            print(a)

        if arg.priority_order == 'intent_first':
            for n in range(arg.iteration_num):
                with tf.variable_scope('intent_subnet' + str(n - 1)):
                    # embedding*2
                    attn_size = state_shape[2].value
                    # [bs, nstep, 1, embed size * 2]
                    hidden = tf.expand_dims(state_outputs, 2)
                    # [bs,nstep, 1, embeddize*2]
                    reinforce_state = tf.expand_dims(slot_d, 2)
                    k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size])
                    k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size])
                    # [bs, nstep, 1, embed size * 2]
                    reinforce_features = tf.nn.conv2d(reinforce_state, k1, [1, 1, 1, 1], "SAME")
                    hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1], "SAME")
                    v1 = tf.get_variable("AttnV", [attn_size])
                    bias = tf.get_variable("Bias", [attn_size])
                    # [bs, nstep]
                    s = tf.reduce_sum(v1 * tf.tanh(hidden_features + reinforce_features + bias), [2, 3])
                    a = tf.nn.softmax(s)
                    # [bs, nstep, 1]
                    a = tf.expand_dims(a, -1)
                    # [bs, nstep, 1, 1]
                    a = tf.expand_dims(a, -1)
                    # [bs, embedding*2]
                    r_slot = tf.reduce_sum(a * reinforce_state, [1, 2])

                    r_intent = r_slot + intent_context_states

                    intent_output = tf.concat([r_intent, intent_input], 1)

                # with tf.variable_scope('slot_subnet' + str(n - 1)):
                #     # [bs, embed size * 2]
                #     intent_gate = core_rnn_cell._linear(intent_output, attn_size, True)
                #     # [bs, 1,embed size * 2]
                #     intent_gate = tf.reshape(intent_gate, [-1, 1, intent_gate.get_shape()[1].value])
                #     v1 = tf.get_variable("gateV", [attn_size])
                #     # [bs, nstep, embed size * 2]
                #     relation_factor = v1 * tf.tanh(slot_d + intent_gate)
                #     # [bs, nstep]
                #     relation_factor = tf.reduce_sum(relation_factor, [2])
                #     # [bs, nstep, 1]
                #     relation_factor = tf.expand_dims(relation_factor, -1)
                #     # [bs, nstep, embed size * 2]
                #     reinforce_state = slot_d * relation_factor
                #     # [bs * nstep, embed size * 2]
                #     reinforce_vector = tf.reshape(reinforce_state, [-1, attn_size])
                #     # [bs * nstep, embed size * 4]
                #     slot_output = tf.concat([reinforce_vector, slot_inputs], 1)
              

        else:
            for n in range(arg.iteration_num):
                # with tf.variable_scope('slot_subnet' + str(n - 1)):
                #     # [bs, embed size * 2]
                #     intent_gate = core_rnn_cell._linear(intent_output, attn_size, True)
                #     # [bs, 1,embed size * 2]
                #     intent_gate = tf.reshape(intent_gate, [-1, 1, intent_gate.get_shape()[1].value])  
                #     v1 = tf.get_variable("gateV", [attn_size])
                #     # [bs, nstep, embed size * 2]
                #     relation_factor = v1 * tf.tanh(slot_d + intent_gate)  
                #     # [bs, nstep]
                #     relation_factor = tf.reduce_sum(relation_factor, [2])
                #     # [bs, nstep, 1]
                #     relation_factor = tf.expand_dims(relation_factor, -1)
                #     reinforce_state = slot_d * relation_factor
                #     # [bs * nstep, embed size * 2]
                #     reinforce_vector = tf.reshape(reinforce_state, [-1, attn_size])
                #     # [bs * nstep, embed size * 4]
                #     slot_output = tf.concat([reinforce_vector,slot_inputs], 1)               

                with tf.variable_scope('intent_subnet' + str(n - 1)):
                    # embedding*2
                    attn_size = state_shape[2].value
                    # [bs, nstep, 1, embed size * 2]
                    hidden = tf.expand_dims(state_outputs, 2)
                    # [bs,nstep, 1, embedding_size*2]
                    reinforce_state = tf.expand_dims(slot_d, 2)

                    k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size])
                    k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size])
                    # [bs, nstep, 1, embed size * 2]
                    slot_features = tf.nn.conv2d(reinforce_state, k1, [1, 1, 1, 1], "SAME")
                    hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1], "SAME")
                    v1 = tf.get_variable("AttnV", [attn_size])
                    bias = tf.get_variable("Bias", [attn_size])
                    # [bs, nstep]
                    s = tf.reduce_sum(v1 * tf.tanh(hidden_features + slot_features + bias), [2, 3])
                    a = tf.nn.softmax(s)
                    # [bs, nstep, 1]
                    a = tf.expand_dims(a, -1)
                    # [bs, nstep, 1, 1]
                    a = tf.expand_dims(a, -1)
                    # [bs, embedding*2]
                    slot_reinforce_states = tf.reduce_sum(a * reinforce_state, [1, 2])

                    r_intent = slot_reinforce_states + intent_context_states

                    intent_output = tf.concat([r_intent, intent_input], 1)

    with tf.variable_scope('intent_proj'):
        # [bs, intent_size]
        intent = core_rnn_cell._linear(intent_output, intent_size, True)
    with tf.variable_scope('slot_proj'):
        # [bs * nsetp, intent_size]
        slot = core_rnn_cell._linear(slot_output, slot_size, True)
        if arg.use_crf:
            nstep = tf.shape(state_outputs)[1]
            slot = tf.reshape(slot, [-1, nstep, slot_size])
            # [bs,nstep,slot_size]
    outputs = [slot, intent]
    return outputs


# Create Training Model
input_data = tf.placeholder(tf.int32, [None, None], name='inputs')
sequence_length = tf.placeholder(tf.int32, [None], name="sequence_length")
global_step = tf.Variable(0, trainable=False, name='global_step')
slots = tf.placeholder(tf.int32, [None, None], name='slots')
slot_weights = tf.placeholder(tf.float32, [None, None], name='slot_weights')
intent = tf.placeholder(tf.int32, [None], name='intent')

with tf.variable_scope('model'):
    training_outputs = createModel(input_data, len(in_vocab['vocab']), sequence_length, slots, len(slot_vocab['vocab']),
                                   len(intent_vocab['vocab']), layer_size=arg.layer_size)

slots_shape = tf.shape(slots)
slots_reshape = tf.reshape(slots, [-1])

slot_outputs = training_outputs[0]
with tf.variable_scope('slot_loss'):
    if arg.use_crf:
        log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood(slot_outputs, slots, sequence_length)
        slot_loss = tf.reduce_mean(-log_likelihood) 
    else:
        crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=slots_reshape, logits=slot_outputs)
        crossent = tf.reshape(crossent, slots_shape)
        slot_loss = tf.reduce_sum(crossent * slot_weights, 1)
        total_size = tf.reduce_sum(slot_weights, 1)
        total_size += 1e-12
        slot_loss = slot_loss / total_size

intent_output = training_outputs[1]
with tf.variable_scope('intent_loss'):
    crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=intent, logits=intent_output)
    intent_loss = tf.reduce_sum(crossent) / tf.cast(arg.batch_size, tf.float32)
params = tf.trainable_variables()
# learning rate decay
learning_rate = tf.train.exponential_decay(arg.learning_rate, global_step, arg.decay_steps, arg.decay_rate,
                                           staircase=False)
if arg.learning_rate_decay:
    opt = tf.train.AdamOptimizer(learning_rate)
else:
    opt = tf.train.AdamOptimizer(arg.learning_rate)
intent_params = []
slot_params = []
for p in params:
    if not 'slot_' in p.name:
        intent_params.append(p)
    if 'slot_' in p.name or 'bidirectional_rnn' in p.name or 'embedding' in p.name:
        slot_params.append(p) 
gradients_slot = tf.gradients(slot_loss, slot_params)
gradients_intent = tf.gradients(intent_loss, intent_params)

clipped_gradients_slot, norm_slot = tf.clip_by_global_norm(gradients_slot, 5.0)
clipped_gradients_intent, norm_intent = tf.clip_by_global_norm(gradients_intent, 5.0)

gradient_norm_slot = norm_slot
gradient_norm_intent = norm_intent
update_slot = opt.apply_gradients(zip(clipped_gradients_slot, slot_params))
update_intent = opt.apply_gradients(zip(clipped_gradients_intent, intent_params), global_step=global_step)

training_outputs = [global_step, slot_loss, update_intent, update_slot, gradient_norm_intent, gradient_norm_slot]
inputs = [input_data, sequence_length, slots, slot_weights, intent]

# Create Inference Model
with tf.variable_scope('model', reuse=True):
    inference_outputs = createModel(input_data, len(in_vocab['vocab']), sequence_length, slots,
                                    len(slot_vocab['vocab']),
                                    len(intent_vocab['vocab']), layer_size=arg.layer_size, isTraining=False)
# slot output
if arg.use_crf:
    inference_slot_output, pred_scores = tf.contrib.crf.crf_decode(inference_outputs[0], trans_params, sequence_length)
else:
    inference_slot_output = tf.nn.softmax(inference_outputs[0], name='slot_output')
# intent output

inference_intent_output = tf.nn.softmax(inference_outputs[1], name='intent_output')

inference_outputs = [inference_intent_output, inference_slot_output]
inference_inputs = [input_data, sequence_length]

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

saver = tf.train.Saver()

# gpu setting
gpu_options = tf.GPUOptions(allow_growth=True)

# Start Training
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    sess.run(tf.global_variables_initializer()) 
    logging.info('Training Start')

    epochs = 0
    loss = 0.0
    data_processor = None
    line = 0
    num_loss = 0
    step = 0
    no_improve = 0

    # variables to store highest values among epochs, only use 'valid_err' for now
    valid_slot = 0
    test_slot = 0
    valid_intent = 0
    test_intent = 0
    valid_err = 0 
    test_err = 0
    best_epoch_num = 0
    while True:
        #print("1")
        
#print(len(train_text_arr),len(train_tags_arr),len(train_intents))
        #print('2')
        if data_processor==None:
          train_text_arr=[]
          train_tags_arr=[]
          train_intents=[]
          for i in range(0,5000):
            train_text_arr.append(text_arr_new[i])
            train_tags_arr.append(tag_arr_new2[i])
            train_intents.append(label_arr_new[i])
          data_processor = DataProcessor(train_text_arr,train_tags_arr,train_intents, in_vocab, slot_vocab,intent_vocab)
        #print(3)
        in_data, slot_data, slot_weight, length, intents, _, _, _ = data_processor.get_batch(arg.batch_size)
        #print('4')
        feed_dict = {input_data.name: in_data, slots.name: slot_data, slot_weights.name: slot_weight,
                     sequence_length.name: length, intent.name: intents}
        #print(len(train_text_arr),len(train_tags_arr),len(train_intents))
        #print('5')
        ret = sess.run(training_outputs, feed_dict)
        loss += np.mean(ret[1])
        #print('6')
        line += arg.batch_size
        step = ret[0] 
        num_loss += 1
        #print('7')
        if data_processor.end == 1:
            arg.batch_size += arg.batch_size_add
            if 5000 % arg.batch_size ==0:
              arg.batch_size+=1
            line = 0
            #data_processor.close()
            data_processor = None
            epochs += 1
            logging.info('Step: ' + str(step))
            logging.info('Epochs: ' + str(epochs))
            logging.info('Loss: ' + str(loss / num_loss))
            num_loss = 0
            loss = 0.0
         
            save_path = os.path.join(arg.model_path, '_step_' + str(step) + '_epochs_' + str(epochs) + '.ckpt')
            saver.save(sess, save_path)


            def valid(in_path, slot_path, intent_path):
                data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab,
                                                     intent_vocab)

                pred_intents = []
                correct_intents = []
                slot_outputs = []
                correct_slots = []
                input_words = []

                # used to gate
                gate_seq = []
                while True:
                    in_data, slot_data, slot_weight, length, intents, in_seq, slot_seq, intent_seq = data_processor_valid.get_batch(
                        arg.batch_size)  
                    if len(in_data) <= 0:
                        break
                    feed_dict = {input_data.name: in_data, sequence_length.name: length}
                    ret = sess.run(inference_outputs, feed_dict)
                    for i in ret[0]: 
                        pred_intents.append(np.argmax(i))
                    for i in intents:
                        correct_intents.append(i)

                    pred_slots = ret[1].reshape((slot_data.shape[0], slot_data.shape[1], -1)) 
                    for p, t, i, l in zip(pred_slots, slot_data, in_data, length):
                        if arg.use_crf:
                            p = p.reshape([-1]) 
                        else:
                            p = np.argmax(p, 1)
                        tmp_pred = []
                        tmp_correct = []
                        tmp_input = []
                        for j in range(l):
                            tmp_pred.append(slot_vocab['rev'][p[j]])
                            tmp_correct.append(slot_vocab['rev'][t[j]])
                            tmp_input.append(in_vocab['rev'][i[j]])

                        slot_outputs.append(tmp_pred)
                        correct_slots.append(tmp_correct)
                        input_words.append(tmp_input)

                    if data_processor_valid.end == 1:
                        break

                pred_intents = np.array(pred_intents)
                correct_intents = np.array(correct_intents)
                accuracy = (pred_intents == correct_intents)
                semantic_acc = accuracy
                accuracy = accuracy.astype(float)
                accuracy = np.mean(accuracy) * 100.0

                index = 0
                for t, p in zip(correct_slots, slot_outputs):
                    # Process Semantic Error
                    if len(t) != len(p):
                        raise ValueError('Error!!')

                    for j in range(len(t)):
                        if p[j] != t[j]:
                            semantic_acc[index] = False
                            break
                    index += 1
                semantic_acc = semantic_acc.astype(float)
                semantic_acc = np.mean(semantic_acc) * 100.0

                f1, precision, recall = computeF1Score(correct_slots, slot_outputs)
                logging.info('slot f1: ' + str(f1))
                logging.info('intent accuracy: ' + str(accuracy))
                logging.info('semantic Acc(intent, slots are all correct): ' + str(semantic_acc))

                #data_processor_valid.close()
                return f1, accuracy, semantic_acc, pred_intents, correct_intents, slot_outputs, correct_slots, input_words, gate_seq

            
            logging.info('Valid:')
            val_text_arr=[]
            val_tags_arr=[]
            val_intents=[]
            for i in range(5000,7000):
              val_text_arr.append(text_arr_new[i])
              val_tags_arr.append(tag_arr_new2[i])
              val_intents.append(label_arr_new[i])
            epoch_valid_slot, epoch_valid_intent, epoch_valid_err, valid_pred_intent, valid_correct_intent, valid_pred_slot, valid_correct_slot, valid_words, valid_gate = valid(
                val_text_arr,val_tags_arr,val_intents)

            logging.info('Test:')
            data_text_arr=[]
            data_tags_arr=[]
            data_intents=[]
            for i in range(7000,8357):
              data_text_arr.append(text_arr_new[i])
              data_tags_arr.append(tag_arr_new2[i])
              data_intents.append(label_arr_new[i])
            epoch_test_slot, epoch_test_intent, epoch_test_err, test_pred_intent, test_correct_intent, test_pred_slot, test_correct_slot, test_words, test_gate = valid(
                data_text_arr,
              data_tags_arr,
              data_intents)

            if epoch_test_err <= test_err:
                no_improve += 1
            else:
                best_epoch_num = epochs
                test_err = epoch_test_err

                # logging.info('new best epoch number: Epoch Number: {}'.format(best_epoch_num))
                # logging.info('new best score: Semantic Acc: {}'.format(epoch_test_err))
                no_improve = 0

            if test_err > 0:
                logging.info('best epoch_num :  {}'.format(best_epoch_num))
                logging.info('best score : {}'.format(test_err))

            if epochs == arg.max_epochs:
                break

            if arg.early_stop == True:
                if no_improve > arg.patience:
                    break

batch_size = 16
batch_size_add = 8
cell = lstm
dataset = atis
decay_rate = 0.9
decay_steps = 1120
early_stop = True
embedding_path = 
input_file = seq.in
intent_file = label
iteration_num = 1
layer_size = 64
learning_rate = 0.001
learning_rate_decay = 1
max_epochs = 100
model_path = ./model
model_type = intent_only
patience = 15
priority_order = slot_first
slot_file = seq.out
test_data_path = test
train_data_path = train
use_crf = True
use_embedding = 1
valid_data_path = valid
vocab_path = ./vocab

use atis dataset
Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').
Tensor("model/attention/intent_attn/ExpandDims_2:0", shape=(?, ?, 1, 1), dtype=float32)


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Tensor("model_1/attention/intent_attn/ExpandDims_2:0", shape=(?, ?, 1, 1), dtype=float32)


2020-07-03 13:24:07,977 : INFO : Training Start
2020-07-03 13:24:30,788 : INFO : Step: 313
2020-07-03 13:24:30,789 : INFO : Epochs: 1
2020-07-03 13:24:30,791 : INFO : Loss: 4.040135130714685
2020-07-03 13:24:31,208 : INFO : Valid:
2020-07-03 13:24:32,929 : INFO : slot f1: 0.273972602739726
2020-07-03 13:24:32,930 : INFO : intent accuracy: 79.95
2020-07-03 13:24:32,931 : INFO : semantic Acc(intent, slots are all correct): 60.150000000000006
2020-07-03 13:24:32,933 : INFO : Test:
2020-07-03 13:24:34,106 : INFO : slot f1: 0.7312614259597807
2020-07-03 13:24:34,107 : INFO : intent accuracy: 82.53500368459838
2020-07-03 13:24:34,109 : INFO : semantic Acc(intent, slots are all correct): 59.46941783345615
2020-07-03 13:24:34,110 : INFO : best epoch_num :  1
2020-07-03 13:24:34,112 : INFO : best score : 59.46941783345615
2020-07-03 13:24:52,847 : INFO : Step: 522
2020-07-03 13:24:52,848 : INFO : Epochs: 2
2020-07-03 13:24:52,850 : INFO : Loss: 1.7117847731809297
2020-07-03 13:24:53,170 : INFO 

In [11]:
import os
import argparse
import logging
import sys
import tensorflow as tf
import numpy as np
from tensorflow.contrib.rnn.python.ops import core_rnn_cell
from tensorflow.python.ops import rnn_cell_impl

#from utils import createVocabulary, loadVocabulary, computeF1Score, DataProcessor

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# tf.set_random_seed(20181226) 
# np.random.seed(20181226)
# todo: 1. word pre-train embedding, gru, crf, lr decay

parser = argparse.ArgumentParser(allow_abbrev=False)

# Network
parser.add_argument("--num_units", type=int, default=64, help="Network size.", dest='layer_size',required=False)
parser.add_argument("--model_type", type=str, default='full', help="""full(default) | intent_only
                                                                    full: full attention model
                                                                    intent_only: intent attention model""",required=False)
parser.add_argument("--priority_order", type=str, default='slot_first', help="""Type 'slot_first' or 'intent_first'
                                                                              to decide whose influence ought to calculate first use.""",required=False)
parser.add_argument("--use_crf", type=bool, default=True, help="""use crf for seq labeling""",required=False)
parser.add_argument("--use_embedding", type=str, default='1', help="""use pre-trained embedding""",required=False)
parser.add_argument("--cell", type=str, default='lstm', help="""rnn cell""",required=False)  
parser.add_argument("--iteration_num", type=int, default=1, help="""the number of iteration times""",required=False)

# Training Environment
parser.add_argument("--batch_size", type=int, default=16, help="Batch size.",required=False)
parser.add_argument("--batch_size_add", type=int, default=4, help="Batch size add.",required=False)
parser.add_argument("--max_epochs", type=int, default=100, help="Max epochs to train.",required=False)
parser.add_argument("--no_early_stop", action='store_false', dest='early_stop',
                    help="Disable early stop, which is based on sentence level accuracy.",required=False)  
parser.add_argument("--patience", type=int, default=15, help="Patience to wait before stop.",required=False)
# learn rate param
parser.add_argument("--learning_rate_decay", type=str, default='1', help="learning_rate_decay",required=False)
parser.add_argument("--learning_rate", type=float, default=0.001, help="The initial learning rate.",required=False)
parser.add_argument("--decay_steps", type=int, default=280 * 4, help="decay_steps.",required=False)
parser.add_argument("--decay_rate", type=float, default=0.9, help="decay_rate.",required=False)

# Model and Vocab
parser.add_argument("--dataset", type=str, default='atis', help="""Type 'atis' or 'snips' to use dataset provided by us or enter what ever you named your own dataset.
                Note, if you don't want to use this part, enter --dataset=''. It can not be None""",required=False)
parser.add_argument("--model_path", type=str, default='./model', help="Path to save model.",required=False)
parser.add_argument("--vocab_path", type=str, default='./vocab', help="Path to vocabulary files.",required=False)

# Data
parser.add_argument("--train_data_path", type=str, default='train', help="Path to training data files.",required=False)
parser.add_argument("--test_data_path", type=str, default='test', help="Path to testing data files.",required=False)
parser.add_argument("--valid_data_path", type=str, default='valid', help="Path to validation data files.",required=False)
parser.add_argument("--input_file", type=str, default='seq.in', help="Input file name.",required=False)
parser.add_argument("--slot_file", type=str, default='seq.out', help="Slot file name.",required=False)
parser.add_argument("--intent_file", type=str, default='label', help="Intent file name.",required=False)
parser.add_argument("--embedding_path", type=str, default='', help="embedding array's path.",required=False)

arg = parser.parse_args(''.split())

if arg.dataset=='atis':
    arg.model_type='intent_only'
else:
    arg.model_type='full'

# Print arguments
for k, v in sorted(vars(arg).items()):
    print(k, '=', v)
print()
# use full attention or intent only
if arg.model_type == 'full':
    remove_slot_attn = False
elif arg.model_type == 'intent_only':
    remove_slot_attn = True
else:
    print('unknown model type!')
    exit(1)

# full path to data will be: ./data + dataset + train/test/valid
if arg.dataset == None:
    print('name of dataset can not be None')
    exit(1)
elif arg.dataset == 'snips':
    print('use snips dataset')
elif arg.dataset == 'atis':
    print('use atis dataset')
else:
    print('use own dataset: ', arg.dataset)
full_train_path = os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/data', arg.dataset, arg.train_data_path)
full_test_path = os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/data', arg.dataset, arg.test_data_path)
full_valid_path = os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/data', arg.dataset, arg.valid_data_path)

createVocabulary(train_text_arr, os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'in_vocab'))
createVocabulary(train_tags_arr, os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'slot_vocab'))
createVocabulary(train_intents, os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'intent_vocab'),pad=False, unk=False)
# return map: {'vocab': vocab, 'rev': rev}, vocab: map, rev: array
in_vocab = loadVocabulary(os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'in_vocab'))
slot_vocab = loadVocabulary(os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'slot_vocab'))
intent_vocab = loadVocabulary(os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'intent_vocab'))


def createModel(input_data, input_size, sequence_length, slots, slot_size, intent_size, layer_size=128,
                isTraining=True):
    cell_fw = tf.contrib.rnn.BasicLSTMCell(layer_size)
    cell_bw = tf.contrib.rnn.BasicLSTMCell(layer_size)

    if isTraining == True:
        cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=0.5,
                                                output_keep_prob=0.5)
        cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=0.5,
                                                output_keep_prob=0.5)
    # embedding layer， [word size, embed size] 724, 64
    if arg.embedding_path:  
        embedding_weight = np.load(arg.embedding_path)
        embedding = tf.Variable(embedding_weight, name='embedding', dtype=tf.float32)
    else:
        embedding = tf.get_variable('embedding', [input_size, layer_size])
    # [bs, nstep, embed size]
    inputs = tf.nn.embedding_lookup(embedding, input_data)
    # state_outputs: [bs, nstep, embed size], final_state: [4, bs, embed size] include cell state * 2, hidden state * 2
    state_outputs, final_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs,
                                                                 sequence_length=sequence_length, dtype=tf.float32)
    # [bs, embed size * 4]
    final_state = tf.concat([final_state[0][0], final_state[0][1], final_state[1][0], final_state[1][1]], 1)
    # [bs, nstep, embed size * 2]
    state_outputs = tf.concat([state_outputs[0], state_outputs[1]], 2)
    state_shape = state_outputs.get_shape()

    with tf.variable_scope('attention'):
        # [bs, nstep, embed size * 2]
        slot_inputs = state_outputs 
        if not remove_slot_attn:
            with tf.variable_scope('slot_attn'):
                # embed size * 2
                attn_size = state_shape[2].value
                origin_shape = tf.shape(state_outputs) 
                # [bs, 1, nstep, embed size * 2]
                hidden = tf.expand_dims(state_outputs, 1)
                # [bs, nstep, 1, embed size * 2]
                hidden_conv = tf.expand_dims(state_outputs, 2)
                # k: [filter_height, filter_width, in_channels, out_channels]
                k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size])
                # [bs, nstep, 1, embed size * 2]
                hidden_features = tf.nn.conv2d(hidden_conv, k, [1, 1, 1, 1], "SAME") 
                # [bs, nstep, embed size * 2]
                hidden_features = tf.reshape(hidden_features, origin_shape)
                # [bs, 1, nstep, embed size * 2]
                hidden_features = tf.expand_dims(hidden_features, 1)
                v = tf.get_variable("AttnV", [attn_size])

                slot_inputs_shape = tf.shape(slot_inputs)
                # [bs * nstep, embed size * 2]
                slot_inputs = tf.reshape(slot_inputs, [-1, attn_size])
                # [bs * nstep, embed size * 2]
                y = core_rnn_cell._linear(slot_inputs, attn_size, True)
                # [bs , nstep, embed size * 2]
                y = tf.reshape(y, slot_inputs_shape)
                # [bs , nstep, 1, embed size * 2]
                y = tf.expand_dims(y, 2)
                # [bs , nstep, nstep] = [bs, 1, nstep, hidden size] + [bs , nstep, 1, embed size * 2]
                s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [3])
                a = tf.nn.softmax(s)
                # a shape = [bs, nstep, nstep, 1]
                a = tf.expand_dims(a, -1)
                # a shape = [bs, nstep, embed size * 2]
                slot_d = tf.reduce_sum(a * hidden, [2])
        else:
            attn_size = state_shape[2].value
            slot_d=state_outputs
            slot_inputs = tf.reshape(slot_inputs, [-1, attn_size])

        intent_input = final_state
        with tf.variable_scope('intent_attn'):
            attn_size = state_shape[2].value  
            # [bs, nstep, 1, embed size * 2]
            hidden = tf.expand_dims(state_outputs, 2)
            k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size])
            # [bs, nstep, 1, embed size * 2]
            hidden_features = tf.nn.conv2d(hidden, k, [1, 1, 1, 1], "SAME")
            v = tf.get_variable("AttnV", [attn_size])

            # [bs, embed size * 2]
            y = core_rnn_cell._linear(intent_input, attn_size, True)
            # [bs, 1, 1, embed size * 2]
            y = tf.reshape(y, [-1, 1, 1, attn_size])
            # [bs, nstep]
            s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [2, 3])  
            a = tf.nn.softmax(s)
            # [bs, nstep, 1]
            a = tf.expand_dims(a, -1)
            # [bs, nstep, 1, 1]
            a = tf.expand_dims(a, -1)
            # [bs, embed size * 2]
            d = tf.reduce_sum(a * hidden, [1, 2])
            intent_output = d
            #[bs, embedding * 2]
            intent_context_states = intent_output
            print(a)

        if arg.priority_order == 'intent_first':
            for n in range(arg.iteration_num):
                # with tf.variable_scope('intent_subnet' + str(n - 1)):
                #     # embedding*2
                #     attn_size = state_shape[2].value
                #     # [bs, nstep, 1, embed size * 2]
                #     hidden = tf.expand_dims(state_outputs, 2)
                #     # [bs,nstep, 1, embeddize*2]
                #     reinforce_state = tf.expand_dims(slot_d, 2)
                #     k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size])
                #     k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size])
                #     # [bs, nstep, 1, embed size * 2]
                #     reinforce_features = tf.nn.conv2d(reinforce_state, k1, [1, 1, 1, 1], "SAME")
                #     hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1], "SAME")
                #     v1 = tf.get_variable("AttnV", [attn_size])
                #     bias = tf.get_variable("Bias", [attn_size])
                #     # [bs, nstep]
                #     s = tf.reduce_sum(v1 * tf.tanh(hidden_features + reinforce_features + bias), [2, 3])
                #     a = tf.nn.softmax(s)
                #     # [bs, nstep, 1]
                #     a = tf.expand_dims(a, -1)
                #     # [bs, nstep, 1, 1]
                #     a = tf.expand_dims(a, -1)
                #     # [bs, embedding*2]
                #     r_slot = tf.reduce_sum(a * reinforce_state, [1, 2])
                #
                #     r_intent = r_slot + intent_context_states
                #
                #     intent_output = tf.concat([r_intent, intent_input], 1)

                with tf.variable_scope('slot_subnet' + str(n - 1)):
                    # [bs, embed size * 2]
                    intent_gate = core_rnn_cell._linear(intent_output, attn_size, True)
                    # [bs, 1,embed size * 2]
                    intent_gate = tf.reshape(intent_gate, [-1, 1, intent_gate.get_shape()[1].value]) 
                    v1 = tf.get_variable("gateV", [attn_size])
                    # [bs, nstep, embed size * 2]
                    relation_factor = v1 * tf.tanh(slot_d + intent_gate)
                    # [bs, nstep]
                    relation_factor = tf.reduce_sum(relation_factor, [2])
                    # [bs, nstep, 1]
                    relation_factor = tf.expand_dims(relation_factor, -1)
                    # [bs, nstep, embed size * 2]
                    reinforce_state = slot_d * relation_factor
                    # [bs * nstep, embed size * 2]
                    reinforce_vector = tf.reshape(reinforce_state, [-1, attn_size])
                    # [bs * nstep, embed size * 4]
                    slot_output = tf.concat([reinforce_vector, slot_inputs], 1)


        else:
            for n in range(arg.iteration_num):
                with tf.variable_scope('slot_subnet' + str(n - 1)):
                    # [bs, embed size * 2]
                    intent_gate = core_rnn_cell._linear(intent_output, attn_size, True)
                    # [bs, 1,embed size * 2]
                    intent_gate = tf.reshape(intent_gate, [-1, 1, intent_gate.get_shape()[1].value]) 
                    v1 = tf.get_variable("gateV", [attn_size])
                    # [bs, nstep, embed size * 2]
                    relation_factor = v1 * tf.tanh(slot_d + intent_gate) 
                    # [bs, nstep]
                    relation_factor = tf.reduce_sum(relation_factor, [2])
                    # [bs, nstep, 1]
                    relation_factor = tf.expand_dims(relation_factor, -1)
                    reinforce_state = slot_d * relation_factor
                    # [bs * nstep, embed size * 2]
                    reinforce_vector = tf.reshape(reinforce_state, [-1, attn_size])
                    # [bs * nstep, embed size * 4]
                    slot_output = tf.concat([reinforce_vector,slot_inputs], 1)                    

                # with tf.variable_scope('intent_subnet' + str(n - 1)):
                #     # embedding*2
                #     attn_size = state_shape[2].value
                #     # [bs, nstep, 1, embed size * 2]
                #     hidden = tf.expand_dims(state_outputs, 2)
                #     # [bs,nstep, 1, embedding_size*2]
                #     reinforce_output = tf.expand_dims(reinforce_state, 2)
                #     k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size])
                #     k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size])
                #     # [bs, nstep, 1, embed size * 2]
                #     slot_features = tf.nn.conv2d(reinforce_output, k1, [1, 1, 1, 1], "SAME")  
                #     hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1], "SAME")
                #     v1 = tf.get_variable("AttnV", [attn_size])
                #     bias = tf.get_variable("Bias", [attn_size])
                #     # [bs, nstep]
                #     s = tf.reduce_sum(v1 * tf.tanh(hidden_features + slot_features + bias), [2, 3])
                #     a = tf.nn.softmax(s)
                #     # [bs, nstep, 1]
                #     a = tf.expand_dims(a, -1)
                #     # [bs, nstep, 1, 1]
                #     a = tf.expand_dims(a, -1)
                #     # [bs, embedding*2]
                #     slot_reinforce_states = tf.reduce_sum(a * reinforce_output, [1, 2])
                #
                #     r_intent = slot_reinforce_states + intent_context_states
                #
                #     intent_output = tf.concat([r_intent, intent_input], 1)

    with tf.variable_scope('intent_proj'):
        # [bs, intent_size]
        intent = core_rnn_cell._linear(intent_output, intent_size, True)
    with tf.variable_scope('slot_proj'):
        # [bs * nsetp, intent_size]
        slot = core_rnn_cell._linear(slot_output, slot_size, True)
        if arg.use_crf:
            nstep = tf.shape(state_outputs)[1]
            slot = tf.reshape(slot, [-1, nstep, slot_size])
            # [bs,nstep,slot_size]
    outputs = [slot, intent]
    return outputs


# Create Training Model
input_data = tf.placeholder(tf.int32, [None, None], name='inputs') 
sequence_length = tf.placeholder(tf.int32, [None], name="sequence_length")
global_step = tf.Variable(0, trainable=False, name='global_step')
slots = tf.placeholder(tf.int32, [None, None], name='slots')
slot_weights = tf.placeholder(tf.float32, [None, None], name='slot_weights')
intent = tf.placeholder(tf.int32, [None], name='intent')

with tf.variable_scope('model'):
    training_outputs = createModel(input_data, len(in_vocab['vocab']), sequence_length, slots, len(slot_vocab['vocab']),
                                   len(intent_vocab['vocab']), layer_size=arg.layer_size)

slots_shape = tf.shape(slots)
slots_reshape = tf.reshape(slots, [-1])

slot_outputs = training_outputs[0]
with tf.variable_scope('slot_loss'):
    if arg.use_crf:
        log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood(slot_outputs, slots, sequence_length)
        slot_loss = tf.reduce_mean(-log_likelihood)
    else:
        crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=slots_reshape, logits=slot_outputs)
        crossent = tf.reshape(crossent, slots_shape)
        slot_loss = tf.reduce_sum(crossent * slot_weights, 1)
        total_size = tf.reduce_sum(slot_weights, 1)
        total_size += 1e-12
        slot_loss = slot_loss / total_size

intent_output = training_outputs[1]
with tf.variable_scope('intent_loss'):
    crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=intent, logits=intent_output)
    intent_loss = tf.reduce_sum(crossent) / tf.cast(arg.batch_size, tf.float32)

params = tf.trainable_variables()
# learning rate decay
learning_rate = tf.train.exponential_decay(arg.learning_rate, global_step, arg.decay_steps, arg.decay_rate,
                                           staircase=False)
if arg.learning_rate_decay:
    opt = tf.train.AdamOptimizer(learning_rate)
else:
    opt = tf.train.AdamOptimizer(arg.learning_rate)
intent_params = []
slot_params = []
for p in params:
    if not 'slot_' in p.name:
        intent_params.append(p)
    if 'slot_' in p.name or 'bidirectional_rnn' in p.name or 'embedding' in p.name:
        slot_params.append(p)  

gradients_slot = tf.gradients(slot_loss, slot_params)
gradients_intent = tf.gradients(intent_loss, intent_params)
clipped_gradients_slot, norm_slot = tf.clip_by_global_norm(gradients_slot, 5.0)
clipped_gradients_intent, norm_intent = tf.clip_by_global_norm(gradients_intent, 5.0)
gradient_norm_slot = norm_slot
gradient_norm_intent = norm_intent
update_slot = opt.apply_gradients(zip(clipped_gradients_slot, slot_params))
update_intent = opt.apply_gradients(zip(clipped_gradients_intent, intent_params), global_step=global_step)
training_outputs = [global_step, slot_loss, update_intent, update_slot, gradient_norm_intent, gradient_norm_slot]
inputs = [input_data, sequence_length, slots, slot_weights, intent]

# Create Inference Model
with tf.variable_scope('model', reuse=True):
    inference_outputs = createModel(input_data, len(in_vocab['vocab']), sequence_length, slots,
                                    len(slot_vocab['vocab']),
                                    len(intent_vocab['vocab']), layer_size=arg.layer_size, isTraining=False)
# slot output
if arg.use_crf:
    inference_slot_output, pred_scores = tf.contrib.crf.crf_decode(inference_outputs[0], trans_params, sequence_length)
else:
    inference_slot_output = tf.nn.softmax(inference_outputs[0], name='slot_output')
# intent output

inference_intent_output = tf.nn.softmax(inference_outputs[1], name='intent_output')

inference_outputs = [inference_intent_output, inference_slot_output]
inference_inputs = [input_data, sequence_length]

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

saver = tf.train.Saver()
# gpu setting
gpu_options = tf.GPUOptions(allow_growth=True)

# Start Training
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    sess.run(tf.global_variables_initializer()) 
    logging.info('Training Start') 

    epochs = 0
    loss = 0.0
    data_processor = None
    line = 0
    num_loss = 0
    step = 0
    no_improve = 0

    # variables to store highest values among epochs, only use 'valid_err' for now
    valid_slot = 0
    test_slot = 0
    valid_intent = 0
    test_intent = 0
    valid_err = 0  
    test_err = 0
    best_epoch_num = 0
    while True:
        if data_processor == None:
            train_text_arr=[]
            train_tags_arr=[]
            train_intents=[]
            for i in range(0,5000):
              train_text_arr.append(text_arr_new[i])
              train_tags_arr.append(tag_arr_new2[i])
              train_intents.append(label_arr_new[i])
#print(len(train_text_arr),len(train_tags_arr),len(train_intents))
            data_processor = DataProcessor(train_text_arr,train_tags_arr,train_intents, in_vocab, slot_vocab,
                                           intent_vocab)
        in_data, slot_data, slot_weight, length, intents, _, _, _ = data_processor.get_batch(arg.batch_size)
        feed_dict = {input_data.name: in_data, slots.name: slot_data, slot_weights.name: slot_weight,
                     sequence_length.name: length, intent.name: intents}
        ret = sess.run(training_outputs, feed_dict)
        loss += np.mean(ret[1])

        line += arg.batch_size
        step = ret[0]
        num_loss += 1

        if data_processor.end == 1:
            arg.batch_size += arg.batch_size_add 
            if 5000 % arg.batch_size==0:
              arg.batch_size+=1
            line = 0
            #data_processor.close() 
            data_processor = None
            epochs += 1 
            logging.info('Step: ' + str(step))
            logging.info('Epochs: ' + str(epochs))
            logging.info('Loss: ' + str(loss / num_loss))
            num_loss = 0
            loss = 0.0

            save_path = os.path.join(arg.model_path, '_step_' + str(step) + '_epochs_' + str(epochs) + '.ckpt')
            saver.save(sess, save_path)


            def valid(in_path, slot_path, intent_path):
                data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab,
                                                     intent_vocab)

                pred_intents = []
                correct_intents = []
                slot_outputs = []
                correct_slots = []
                input_words = []

                # used to gate
                gate_seq = []
                while True:
                    in_data, slot_data, slot_weight, length, intents, in_seq, slot_seq, intent_seq = data_processor_valid.get_batch(
                        arg.batch_size)
                    if len(in_data) <= 0:
                        break
                    feed_dict = {input_data.name: in_data, sequence_length.name: length}
                    ret = sess.run(inference_outputs, feed_dict)
                    for i in ret[0]:  
                        pred_intents.append(np.argmax(i))
                    for i in intents:
                        correct_intents.append(i)

                    pred_slots = ret[1].reshape((slot_data.shape[0], slot_data.shape[1], -1))
                    for p, t, i, l in zip(pred_slots, slot_data, in_data, length):
                        if arg.use_crf:
                            p = p.reshape([-1])
                        else:
                            p = np.argmax(p, 1)
                        tmp_pred = []
                        tmp_correct = []
                        tmp_input = []
                        for j in range(l):
                            tmp_pred.append(slot_vocab['rev'][p[j]])
                            tmp_correct.append(slot_vocab['rev'][t[j]])
                            tmp_input.append(in_vocab['rev'][i[j]])

                        slot_outputs.append(tmp_pred)  
                        correct_slots.append(tmp_correct)
                        input_words.append(tmp_input)

                    if data_processor_valid.end == 1:
                        break

                pred_intents = np.array(pred_intents)
                correct_intents = np.array(correct_intents)
                accuracy = (pred_intents == correct_intents)
                semantic_acc = accuracy
                accuracy = accuracy.astype(float)
                accuracy = np.mean(accuracy) * 100.0  

                index = 0
                for t, p in zip(correct_slots, slot_outputs):
                    # Process Semantic Error
                    if len(t) != len(p):
                        raise ValueError('Error!!')

                    for j in range(len(t)):
                        if p[j] != t[j]:
                            semantic_acc[index] = False
                            break
                    index += 1
                semantic_acc = semantic_acc.astype(float)
                semantic_acc = np.mean(semantic_acc) * 100.0

                f1, precision, recall = computeF1Score(correct_slots, slot_outputs)
                logging.info('slot f1: ' + str(f1))
                logging.info('intent accuracy: ' + str(accuracy))
                logging.info('semantic Acc(intent, slots are all correct): ' + str(semantic_acc))

                #data_processor_valid.close()
                return f1, accuracy, semantic_acc, pred_intents, correct_intents, slot_outputs, correct_slots, input_words, gate_seq


            logging.info('Valid:')
            val_text_arr=[]
            val_tags_arr=[]
            val_intents=[]
            for i in range(5000,7000):
              val_text_arr.append(text_arr_new[i])
              val_tags_arr.append(tag_arr_new2[i])
              val_intents.append(label_arr_new[i])
#print(len(val_text_arr),len(val_tags_arr),len(val_intents))
            epoch_valid_slot, epoch_valid_intent, epoch_valid_err, valid_pred_intent, valid_correct_intent, valid_pred_slot, valid_correct_slot, valid_words, valid_gate = valid(
                val_text_arr,
              val_tags_arr,
              val_intents)

            logging.info('Test:')
            data_text_arr=[]
            data_tags_arr=[]
            data_intents=[]
            for i in range(7000,8357):
              data_text_arr.append(text_arr_new[i])
              data_tags_arr.append(tag_arr_new2[i])
              data_intents.append(label_arr_new[i])
#print(len(data_text_arr),len(data_tags_arr),len(data_intents))
            epoch_test_slot, epoch_test_intent, epoch_test_err, test_pred_intent, test_correct_intent, test_pred_slot, test_correct_slot, test_words, test_gate = valid(
                data_text_arr,data_tags_arr,data_intents
                )

            if epoch_test_err <= test_err:
                no_improve += 1
            else:
                best_epoch_num = epochs
                test_err = epoch_test_err

                # logging.info('new best epoch number: Epoch Number: {}'.format(best_epoch_num))
                # logging.info('new best score: Semantic Acc: {}'.format(epoch_test_err))
                no_improve = 0

            if test_err > 0:
                logging.info('best epoch_num :  {}'.format(best_epoch_num))
                logging.info('best score : {}'.format(test_err))

            if epochs == arg.max_epochs:
                break

            if arg.early_stop == True:
                if no_improve > arg.patience:
                    break

batch_size = 16
batch_size_add = 4
cell = lstm
dataset = atis
decay_rate = 0.9
decay_steps = 1120
early_stop = True
embedding_path = 
input_file = seq.in
intent_file = label
iteration_num = 1
layer_size = 64
learning_rate = 0.001
learning_rate_decay = 1
max_epochs = 100
model_path = ./model
model_type = intent_only
patience = 15
priority_order = slot_first
slot_file = seq.out
test_data_path = test
train_data_path = train
use_crf = True
use_embedding = 1
valid_data_path = valid
vocab_path = ./vocab

use atis dataset
Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').
Tensor("model/attention/intent_attn/ExpandDims_2:0", shape=(?, ?, 1, 1), dtype=float32)


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Tensor("model_1/attention/intent_attn/ExpandDims_2:0", shape=(?, ?, 1, 1), dtype=float32)


2020-07-03 14:28:47,278 : INFO : Training Start
2020-07-03 14:29:11,186 : INFO : Step: 313
2020-07-03 14:29:11,187 : INFO : Epochs: 1
2020-07-03 14:29:11,190 : INFO : Loss: 3.93238399394404
2020-07-03 14:29:11,606 : INFO : Valid:
2020-07-03 14:29:13,473 : INFO : slot f1: 0
2020-07-03 14:29:13,474 : INFO : intent accuracy: 78.64999999999999
2020-07-03 14:29:13,475 : INFO : semantic Acc(intent, slots are all correct): 61.050000000000004
2020-07-03 14:29:13,476 : INFO : Test:
2020-07-03 14:29:14,756 : INFO : slot f1: 0
2020-07-03 14:29:14,758 : INFO : intent accuracy: 81.2822402358143
2020-07-03 14:29:14,762 : INFO : semantic Acc(intent, slots are all correct): 60.058953574060425
2020-07-03 14:29:14,766 : INFO : best epoch_num :  1
2020-07-03 14:29:14,767 : INFO : best score : 60.058953574060425
2020-07-03 14:29:34,658 : INFO : Step: 552
2020-07-03 14:29:34,659 : INFO : Epochs: 2
2020-07-03 14:29:34,660 : INFO : Loss: 1.605316693064558
2020-07-03 14:29:34,975 : INFO : Valid:
2020-07-03 14

In [11]:
import os
import argparse
import logging
import sys
import tensorflow as tf
import numpy as np
from tensorflow.contrib.rnn.python.ops import core_rnn_cell
from tensorflow.python.ops import rnn_cell_impl

#from utils import createVocabulary, loadVocabulary, computeF1Score, DataProcessor

os.environ["CUDA_VISIBLE_DEVICES"] = "2"

parser = argparse.ArgumentParser(allow_abbrev=False)
parser.add_argument("--num_units", type=int, default=64, help="Network size.", dest='layer_size',required=False)
parser.add_argument("--model_type", type=str, default='full', help="""full(default) | intent_only
                                                                    full: full attention model
                                                                    intent_only: intent attention model""",required=False)
parser.add_argument("--priority_order", type=str, default='slot_first', help="""Type 'slot_first' or 'intent_first'
                                                                              to decide whose influence ought to calculate first use.""",required=False)
parser.add_argument("--use_crf", type=bool, default=False, help="""use crf for seq labeling""",required=False)
parser.add_argument("--use_embedding", type=str, default='1', help="""use pre-trained embedding""",required=False)
parser.add_argument("--cell", type=str, default='lstm', help="""rnn cell""",required=False)
parser.add_argument("--iteration_num", type=int, default=1, help="""the number of iteration times""",required=False)
parser.add_argument("--batch_size", type=int, default=16, help="Batch size.",required=False)
parser.add_argument("--batch_size_add", type=int, default=4, help="Batch size add.",required=False)
parser.add_argument("--max_epochs", type=int, default=100, help="Max epochs to train.",required=False)
parser.add_argument("--no_early_stop", action='store_false', dest='early_stop',
                    help="Disable early stop, which is based on sentence level accuracy.",required=False)
parser.add_argument("--patience", type=int, default=15, help="Patience to wait before stop.",required=False)
parser.add_argument("--learning_rate_decay", type=str, default='1', help="learning_rate_decay",required=False)
parser.add_argument("--learning_rate", type=float, default=0.001, help="The initial learning rate.",required=False)
parser.add_argument("--decay_steps", type=int, default=280 * 4, help="decay_steps.",required=False)
parser.add_argument("--decay_rate", type=float, default=0.9, help="decay_rate.",required=False)
parser.add_argument("--dataset", type=str, default='atis', help="""Type 'atis' or 'snips' to use dataset provided by us or enter what ever you named your own dataset.
                Note, if you don't want to use this part, enter --dataset=''. It can not be None""",required=False)
parser.add_argument("--model_path", type=str, default='./model', help="Path to save model.",required=False)
parser.add_argument("--vocab_path", type=str, default='./vocab', help="Path to vocabulary files.",required=False)
parser.add_argument("--train_data_path", type=str, default='train', help="Path to training data files.",required=False)
parser.add_argument("--test_data_path", type=str, default='test', help="Path to testing data files.",required=False)
parser.add_argument("--valid_data_path", type=str, default='valid', help="Path to validation data files.",required=False)
parser.add_argument("--input_file", type=str, default='seq.in', help="Input file name.",required=False)
parser.add_argument("--slot_file", type=str, default='seq.out', help="Slot file name.",required=False)
parser.add_argument("--intent_file", type=str, default='label', help="Intent file name.",required=False)
parser.add_argument("--embedding_path", type=str, default='', help="embedding array's path.",required=False)

arg = parser.parse_args(''.split())
if arg.dataset=='atis':
    arg.model_type='intent_only'
else:
    arg.model_type='full'

for k, v in sorted(vars(arg).items()):
    print(k, '=', v)
print()

if arg.model_type == 'full':
    remove_slot_attn = False 
elif arg.model_type == 'intent_only':
    remove_slot_attn = True
else:
    print('unknown model type!')
    exit(1)

if arg.dataset == None:
    print('name of dataset can not be None')
    exit(1)
elif arg.dataset == 'snips':
    print('use snips dataset')
elif arg.dataset == 'atis':
    print('use atis dataset')
else:
    print('use own dataset: ', arg.dataset)
full_train_path = os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/data', arg.dataset, arg.train_data_path)
full_test_path = os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/data', arg.dataset, arg.test_data_path)
full_valid_path = os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/data', arg.dataset, arg.valid_data_path)

createVocabulary(train_text_arr, os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'in_vocab'))
createVocabulary(train_tags_arr, os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'slot_vocab'))
createVocabulary(train_intents, os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'intent_vocab'),pad=False, unk=False)
in_vocab = loadVocabulary(os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'in_vocab'))
slot_vocab = loadVocabulary(os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'slot_vocab'))
intent_vocab = loadVocabulary(os.path.join('/content/drive/My Drive/SF-ID-Network-For-NLU/Vocab2', 'intent_vocab'))


def createModel(input_data, input_size, sequence_length, slots, slot_size, intent_size, layer_size=128,
                isTraining=True):
    cell_fw = tf.contrib.rnn.BasicLSTMCell(layer_size)
    cell_bw = tf.contrib.rnn.BasicLSTMCell(layer_size)

    if isTraining == True:
        cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=0.5,
                                                output_keep_prob=0.5)
        cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=0.5,
                                                output_keep_prob=0.5)
    if arg.embedding_path:
        embedding_weight = np.load(arg.embedding_path)
        embedding = tf.Variable(embedding_weight, name='embedding', dtype=tf.float32)
    else:
        embedding = tf.get_variable('embedding', [input_size, layer_size])
    inputs = tf.nn.embedding_lookup(embedding, input_data)
    state_outputs, final_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs,
                                                                 sequence_length=sequence_length, dtype=tf.float32)
    final_state = tf.concat([final_state[0][0], final_state[0][1], final_state[1][0], final_state[1][1]], 1)
    state_outputs = tf.concat([state_outputs[0], state_outputs[1]], 2)
    state_shape = state_outputs.get_shape()

    with tf.variable_scope('attention'):
        slot_inputs = state_outputs
        if not remove_slot_attn:
            with tf.variable_scope('slot_attn'):
                attn_size = state_shape[2].value
                origin_shape = tf.shape(state_outputs)
                hidden = tf.expand_dims(state_outputs, 1)
                hidden_conv = tf.expand_dims(state_outputs, 2)
                k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size])
                hidden_features = tf.nn.conv2d(hidden_conv, k, [1, 1, 1, 1], "SAME")
                hidden_features = tf.reshape(hidden_features, origin_shape)
                hidden_features = tf.expand_dims(hidden_features, 1)
                v = tf.get_variable("AttnV", [attn_size])
                slot_inputs_shape = tf.shape(slot_inputs)
                slot_inputs = tf.reshape(slot_inputs, [-1, attn_size])
                y = core_rnn_cell._linear(slot_inputs, attn_size, True)
                y = tf.reshape(y, slot_inputs_shape)
                y = tf.expand_dims(y, 2)
                s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [3])
                a = tf.nn.softmax(s)
                a = tf.expand_dims(a, -1)
                slot_d = tf.reduce_sum(a * hidden, [2])
                slot_reinforce_state = tf.expand_dims(slot_d, 2)
        else:
            attn_size = state_shape[2].value
            slot_d=slot_inputs
            slot_reinforce_state = tf.expand_dims(slot_inputs, 2)
            slot_inputs = tf.reshape(slot_inputs, [-1, attn_size])

        intent_input = final_state
        with tf.variable_scope('intent_attn'):
            attn_size = state_shape[2].value
            hidden = tf.expand_dims(state_outputs, 2)
            k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size])
            hidden_features = tf.nn.conv2d(hidden, k, [1, 1, 1, 1], "SAME")
            v = tf.get_variable("AttnV", [attn_size])

            y = core_rnn_cell._linear(intent_input, attn_size, True)
            y = tf.reshape(y, [-1, 1, 1, attn_size])
            s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [2, 3])
            a = tf.nn.softmax(s)
            a = tf.expand_dims(a, -1)
            a = tf.expand_dims(a, -1)
            d = tf.reduce_sum(a * hidden, [1, 2]) 
            r_intent = d
            intent_context_states = d

        if arg.priority_order == 'intent_first':
            for n in range(arg.iteration_num):
                with tf.variable_scope('intent_subnet' + str(n - 1)):
                    attn_size = state_shape[2].value
                    hidden = tf.expand_dims(state_outputs, 2)
                    k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size])
                    k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size])
                    slot_reinforce_features = tf.nn.conv2d(slot_reinforce_state, k1, [1, 1, 1, 1],
                                                           "SAME")
                    hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1], "SAME")
                    v1 = tf.get_variable("AttnV", [attn_size])
                    bias = tf.get_variable("Bias", [attn_size])
                    s = tf.reduce_sum(v1 * tf.tanh(hidden_features + slot_reinforce_features + bias), [2, 3])
                    a = tf.nn.softmax(s)
                    a = tf.expand_dims(a, -1)
                    a = tf.expand_dims(a, -1)
                    r = tf.reduce_sum(a * slot_reinforce_state, [1, 2])

                    r_intent = r + intent_context_states

                    intent_output = tf.concat([r_intent, intent_input], 1)

                with tf.variable_scope('slot_subnet' + str(n - 1)):
                    intent_gate = core_rnn_cell._linear(r_intent, attn_size, True)
                    intent_gate = tf.reshape(intent_gate, [-1, 1, intent_gate.get_shape()[
                        1].value])
                    v1 = tf.get_variable("gateV", [attn_size])
                    relation_factor = v1 * tf.tanh(slot_d + intent_gate)
                    relation_factor = tf.reduce_sum(relation_factor, [2])
                    relation_factor = tf.expand_dims(relation_factor, -1)
                    slot_reinforce_state1 = slot_d * relation_factor
                    slot_reinforce_state = tf.expand_dims(slot_reinforce_state1, 2)
                    slot_reinforce_vector = tf.reshape(slot_reinforce_state1, [-1, attn_size])
                    slot_output = tf.concat([slot_reinforce_vector, slot_inputs], 1)

        else:
            for n in range(arg.iteration_num):
                with tf.variable_scope('slot_subnet' + str(n - 1)):
                    intent_gate = core_rnn_cell._linear(r_intent, attn_size, True)
                    intent_gate = tf.reshape(intent_gate, [-1, 1, intent_gate.get_shape()[
                        1].value])
                    v1 = tf.get_variable("gateV", [attn_size])
                    relation_factor = v1 * tf.tanh(slot_d + intent_gate)
                    relation_factor = tf.reduce_sum(relation_factor, [2])
                    relation_factor = tf.expand_dims(relation_factor, -1)
                    slot_reinforce_state = slot_d * relation_factor
                    slot_reinforce_vector = tf.reshape(slot_reinforce_state, [-1, attn_size])
                    slot_output = tf.concat([slot_reinforce_vector, slot_inputs], 1)

                with tf.variable_scope('intent_subnet' + str(n - 1)):
                    attn_size = state_shape[2].value
                    hidden = tf.expand_dims(state_outputs, 2)
                    slot_reinforce_output = tf.expand_dims(slot_reinforce_state, 2)
                    k1 = tf.get_variable("W1", [1, 1, attn_size, attn_size])
                    k2 = tf.get_variable('W2', [1, 1, attn_size, attn_size])
                    slot_features = tf.nn.conv2d(slot_reinforce_output, k1, [1, 1, 1, 1], "SAME")
                    hidden_features = tf.nn.conv2d(hidden, k2, [1, 1, 1, 1], "SAME")
                    v1 = tf.get_variable("AttnV", [attn_size])
                    bias = tf.get_variable("Bias", [attn_size])
                    s = tf.reduce_sum(v1 * tf.tanh(hidden_features + slot_features + bias), [2, 3])
                    a = tf.nn.softmax(s)
                    a = tf.expand_dims(a, -1)
                    a = tf.expand_dims(a, -1)
                    r = tf.reduce_sum(a * slot_reinforce_output, [1, 2])

                    r_intent = r + intent_context_states

                    intent_output = tf.concat([r_intent, intent_input], 1)

    with tf.variable_scope('intent_proj'):
        intent = core_rnn_cell._linear(intent_output, intent_size, True)
    with tf.variable_scope('slot_proj'):
        slot = core_rnn_cell._linear(slot_output, slot_size, True)
        if arg.use_crf:
            nstep = tf.shape(state_outputs)[1]
            slot = tf.reshape(slot, [-1, nstep, slot_size])
    outputs = [slot, intent]
    return outputs


input_data = tf.placeholder(tf.int32, [None, None], name='inputs')
sequence_length = tf.placeholder(tf.int32, [None], name="sequence_length")
global_step = tf.Variable(0, trainable=False, name='global_step')
slots = tf.placeholder(tf.int32, [None, None], name='slots')
slot_weights = tf.placeholder(tf.float32, [None, None], name='slot_weights')
intent = tf.placeholder(tf.int32, [None], name='intent')

with tf.variable_scope('model'):
    training_outputs = createModel(input_data, len(in_vocab['vocab']), sequence_length, slots, len(slot_vocab['vocab']),
                                   len(intent_vocab['vocab']), layer_size=arg.layer_size)

slots_shape = tf.shape(slots)
slots_reshape = tf.reshape(slots, [-1])

slot_outputs = training_outputs[0]
with tf.variable_scope('slot_loss'):
    if arg.use_crf:
        log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood(slot_outputs, slots, sequence_length)
        slot_loss = tf.reduce_mean(-log_likelihood)
    else:
        crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=slots_reshape, logits=slot_outputs)
        crossent = tf.reshape(crossent, slots_shape)
        slot_loss = tf.reduce_sum(crossent * slot_weights, 1)
        total_size = tf.reduce_sum(slot_weights, 1)
        total_size += 1e-12
        slot_loss = slot_loss / total_size

intent_output = training_outputs[1]
with tf.variable_scope('intent_loss'):
    crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=intent, logits=intent_output)
    intent_loss = tf.reduce_sum(crossent) / tf.cast(arg.batch_size, tf.float32)
params = tf.trainable_variables()
learning_rate = tf.train.exponential_decay(arg.learning_rate, global_step, arg.decay_steps, arg.decay_rate,
                                           staircase=False)
if arg.learning_rate_decay:
    opt = tf.train.AdamOptimizer(learning_rate)
else:
    opt = tf.train.AdamOptimizer(arg.learning_rate)
intent_params = []
slot_params = []
for p in params:
    if not 'slot_' in p.name:
        intent_params.append(p)
    if 'slot_' in p.name or 'bidirectional_rnn' in p.name or 'embedding' in p.name:
        slot_params.append(p)

gradients_slot = tf.gradients(slot_loss, slot_params)
gradients_intent = tf.gradients(intent_loss, intent_params)

clipped_gradients_slot, norm_slot = tf.clip_by_global_norm(gradients_slot, 5.0)
clipped_gradients_intent, norm_intent = tf.clip_by_global_norm(gradients_intent, 5.0)

gradient_norm_slot = norm_slot
gradient_norm_intent = norm_intent
update_slot = opt.apply_gradients(zip(clipped_gradients_slot, slot_params))
update_intent = opt.apply_gradients(zip(clipped_gradients_intent, intent_params), global_step=global_step)

training_outputs = [global_step, slot_loss, update_intent, update_slot, gradient_norm_intent, gradient_norm_slot]
inputs = [input_data, sequence_length, slots, slot_weights, intent]


with tf.variable_scope('model', reuse=True):
    inference_outputs = createModel(input_data, len(in_vocab['vocab']), sequence_length, slots,
                                    len(slot_vocab['vocab']),
                                    len(intent_vocab['vocab']), layer_size=arg.layer_size, isTraining=False)

if arg.use_crf:
    inference_slot_output, pred_scores = tf.contrib.crf.crf_decode(inference_outputs[0], trans_params, sequence_length)
else:
    inference_slot_output = tf.nn.softmax(inference_outputs[0], name='slot_output')

inference_intent_output = tf.nn.softmax(inference_outputs[1], name='intent_output')

inference_outputs = [inference_intent_output, inference_slot_output]
inference_inputs = [input_data, sequence_length]

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

saver = tf.train.Saver()
gpu_options = tf.GPUOptions(allow_growth=True)

with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    sess.run(tf.global_variables_initializer())
    logging.info('Training Start')

    epochs = 0
    loss = 0.0
    data_processor = None
    line = 0
    num_loss = 0
    step = 0
    no_improve = 0

    valid_slot = 0
    test_slot = 0
    valid_intent = 0
    test_intent = 0
    valid_err = 0
    test_err = 0
    best_epoch_num = 0
    while True:
        if data_processor == None:
            train_text_arr=[]
            train_tags_arr=[]
            train_intents=[]
            for i in range(0,5000):
              train_text_arr.append(text_arr_new[i])
              train_tags_arr.append(tag_arr_new2[i])
              train_intents.append(label_arr_new[i])
            data_processor = DataProcessor( train_text_arr,train_tags_arr,train_intents, in_vocab, slot_vocab,
                                           intent_vocab)
        in_data, slot_data, slot_weight, length, intents, _, _, _ = data_processor.get_batch(arg.batch_size)
        feed_dict = {input_data.name: in_data, slots.name: slot_data, slot_weights.name: slot_weight,
                     sequence_length.name: length, intent.name: intents}
        ret = sess.run(training_outputs, feed_dict)
        loss += np.mean(ret[1])

        line += arg.batch_size
        step = ret[0]
        num_loss += 1

        if data_processor.end == 1:
            arg.batch_size += arg.batch_size_add
            if 5000 % arg.batch_size ==0:
              arg.batch_size+=1
            line = 0
            #data_processor.close()
            data_processor = None
            epochs += 1
            logging.info('Step: ' + str(step))
            logging.info('Epochs: ' + str(epochs))
            logging.info('Loss: ' + str(loss / num_loss))
            num_loss = 0
            loss = 0.0

            save_path = os.path.join(arg.model_path, '_step_' + str(step) + '_epochs_' + str(epochs) + '.ckpt')
            saver.save(sess, save_path)


            def valid(in_path, slot_path, intent_path):
                data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab,
                                                     intent_vocab)

                pred_intents = []
                correct_intents = []
                slot_outputs = []
                correct_slots = []
                input_words = []

                gate_seq = []
                while True:
                    in_data, slot_data, slot_weight, length, intents, in_seq, slot_seq, intent_seq = data_processor_valid.get_batch(
                        arg.batch_size)
                    if len(in_data) <= 0:
                        break
                    feed_dict = {input_data.name: in_data, sequence_length.name: length}
                    ret = sess.run(inference_outputs, feed_dict)
                    for i in ret[0]:
                        pred_intents.append(np.argmax(i))
                    for i in intents:
                        correct_intents.append(i)

                    pred_slots = ret[1].reshape((slot_data.shape[0], slot_data.shape[1], -1))
                    for p, t, i, l in zip(pred_slots, slot_data, in_data, length):
                        if arg.use_crf:
                            p = p.reshape([-1])
                        else:
                            p = np.argmax(p, 1)
                        tmp_pred = []
                        tmp_correct = []
                        tmp_input = []
                        for j in range(l):
                            tmp_pred.append(slot_vocab['rev'][p[j]])
                            tmp_correct.append(slot_vocab['rev'][t[j]])
                            tmp_input.append(in_vocab['rev'][i[j]])

                        slot_outputs.append(tmp_pred)
                        correct_slots.append(tmp_correct)
                        input_words.append(tmp_input)

                    if data_processor_valid.end == 1:
                        break

                pred_intents = np.array(pred_intents)
                correct_intents = np.array(correct_intents)
                accuracy = (pred_intents == correct_intents)
                semantic_acc = accuracy
                accuracy = accuracy.astype(float)
                accuracy = np.mean(accuracy) * 100.0

                index = 0
                for t, p in zip(correct_slots, slot_outputs):
                    # Process Semantic Error
                    if len(t) != len(p):
                        raise ValueError('Error!!')

                    for j in range(len(t)):
                        if p[j] != t[j]:
                            semantic_acc[index] = False
                            break
                    index += 1
                semantic_acc = semantic_acc.astype(float)
                semantic_acc = np.mean(semantic_acc) * 100.0

                f1, precision, recall = computeF1Score(correct_slots, slot_outputs)
                logging.info('slot f1: ' + str(f1))
                logging.info('intent accuracy: ' + str(accuracy))
                logging.info('semantic Acc(intent, slots are all correct): ' + str(semantic_acc))

                #data_processor_valid.close()
                return f1, accuracy, semantic_acc, pred_intents, correct_intents, slot_outputs, correct_slots, input_words, gate_seq

            logging.info('Valid:')
            val_text_arr=[]
            val_tags_arr=[]
            val_intents=[]
            for i in range(5000,7000):
              val_text_arr.append(text_arr_new[i])
              val_tags_arr.append(tag_arr_new2[i])
              val_intents.append(label_arr_new[i])
#print(len(val_text_arr),len(val_tags_arr),len(val_intents))
            epoch_valid_slot, epoch_valid_intent, epoch_valid_err, valid_pred_intent, valid_correct_intent, valid_pred_slot, valid_correct_slot, valid_words, valid_gate = valid(
                val_text_arr,
              val_tags_arr,
              val_intents)

            logging.info('Test:')
            data_text_arr=[]
            data_tags_arr=[]
            data_intents=[]
            for i in range(7000,8357):
              data_text_arr.append(text_arr_new[i])
              data_tags_arr.append(tag_arr_new2[i])
              data_intents.append(label_arr_new[i])
#print(len(data_text_arr),len(data_tags_arr),len(data_intents))
            epoch_test_slot, epoch_test_intent, epoch_test_err, test_pred_intent, test_correct_intent, test_pred_slot, test_correct_slot, test_words, test_gate = valid(
                data_text_arr,data_tags_arr,data_intents
                )
            # logging.info('Valid:')
            # epoch_valid_slot, epoch_valid_intent, epoch_valid_err, valid_pred_intent, valid_correct_intent, valid_pred_slot, valid_correct_slot, valid_words, valid_gate = valid(
            #     )

            # logging.info('Test:')
            # epoch_test_slot, epoch_test_intent, epoch_test_err, test_pred_intent, test_correct_intent, test_pred_slot, test_correct_slot, test_words, test_gate = valid(
            #     )

            if epoch_test_err <= test_err:
                no_improve += 1
            else:
                best_epoch_num = epochs
                test_err = epoch_test_err

                no_improve = 0

            if test_err > 0:
                logging.info('best epoch_num :  {}'.format(best_epoch_num))
                logging.info('best score : {}'.format(test_err))

            if epochs == arg.max_epochs:
                break

            if arg.early_stop == True:
                if no_improve > arg.patience:
                    break

batch_size = 16
batch_size_add = 4
cell = lstm
dataset = atis
decay_rate = 0.9
decay_steps = 1120
early_stop = True
embedding_path = 
input_file = seq.in
intent_file = label
iteration_num = 1
layer_size = 64
learning_rate = 0.001
learning_rate_decay = 1
max_epochs = 100
model_path = ./model
model_type = intent_only
patience = 15
priority_order = slot_first
slot_file = seq.out
test_data_path = test
train_data_path = train
use_crf = False
use_embedding = 1
valid_data_path = valid
vocab_path = ./vocab

use atis dataset
Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').


2020-07-03 14:50:27,855 : INFO : Training Start
2020-07-03 14:50:45,710 : INFO : Step: 313
2020-07-03 14:50:45,711 : INFO : Epochs: 1
2020-07-03 14:50:45,712 : INFO : Loss: 0.5085274893492936
2020-07-03 14:50:46,056 : INFO : Valid:
2020-07-03 14:50:47,321 : INFO : slot f1: 0.5333333333333333
2020-07-03 14:50:47,322 : INFO : intent accuracy: 79.3
2020-07-03 14:50:47,323 : INFO : semantic Acc(intent, slots are all correct): 60.650000000000006
2020-07-03 14:50:47,324 : INFO : Test:
2020-07-03 14:50:48,188 : INFO : slot f1: 0
2020-07-03 14:50:48,189 : INFO : intent accuracy: 81.79808400884305
2020-07-03 14:50:48,192 : INFO : semantic Acc(intent, slots are all correct): 60.132645541635966
2020-07-03 14:50:48,193 : INFO : best epoch_num :  1
2020-07-03 14:50:48,194 : INFO : best score : 60.132645541635966
2020-07-03 14:51:02,914 : INFO : Step: 552
2020-07-03 14:51:02,915 : INFO : Epochs: 2
2020-07-03 14:51:02,919 : INFO : Loss: 0.160588145910696
2020-07-03 14:51:03,197 : INFO : Valid:
2020-0

In [12]:
print(test_pred_intent[0], test_correct_intent[0], test_pred_slot[0], test_correct_slot[0],sep='\n')

2
2
['O', 'O', 'O']
['O', 'O', 'O']


In [13]:
for i in range(50):
  print(test_pred_intent[i], test_correct_intent[i], test_pred_slot[i], test_correct_slot[i],sep='\n')

2
2
['O', 'O', 'O']
['O', 'O', 'O']
4
4
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
0
0
['O', 'O', 'O']
['O', 'O', 'O']
5
0
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
1
1
['O', 'B-Cost', 'I-Cost', 'O', 'O']
['O', 'B-Cost', 'I-Cost', 'O', 'O']
0
0
['O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O']
1
1
['O', 'O', 'B-Internal_RAM', 'O', 'B-Internal_RAM', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'B-Internal_RAM', 'O', 'O', 'O', 'O']
3
3
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
4
4
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
0
6
['O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'B-Color', 'O']
1
1
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Color', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Color', 'O', 'O']
0
0
['O

In [14]:
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 
results = confusion_matrix(test_pred_intent, test_correct_intent) 
  
print('Confusion Matrix :')
print(results) 
print('Accuracy Score :',accuracy_score(test_pred_intent, test_correct_intent))
print('Report : ')
print(classification_report(test_pred_intent, test_correct_intent) )

Confusion Matrix :
[[321   4   0   0   0   1  48   0   2   0   0   0   1]
 [  3 309   0   1   0  12   0   0   0   1   0   0   0]
 [  0   0 166   0   0   0   0   0   0   0   0   0   0]
 [  0   2   0 154   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0 158   0   0   0   0   0   0   0   0]
 [  2  16   0   0   1  98   0   0   0   0   0   0   0]
 [  1   1   0   0   0   0  35   0   0   0   0   0   0]
 [  0   0   0   0   1   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   1   0   0   0   0   0   0   0   1   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  13   0   0]
 [  0   0   0   1   0   0   0   0   0   0   0   1   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   2]]
Accuracy Score : 0.9270449521002211
Report : 
              precision    recall  f1-score   support

           0       0.98      0.85      0.91       377
           1       0.93      0.95      0.94       326
           2       1.00      1.00      1.00       166
  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 
results = confusion_matrix(valid_pred_intent, valid_correct_intent) 
  
print('Confusion Matrix :')
print(results) 
print('Accuracy Score :',accuracy_score(valid_pred_intent, valid_correct_intent))
print('Report : ')
print(classification_report(valid_pred_intent, valid_correct_intent) )

Confusion Matrix :
[[543  13   0   0   0   0  63   0   0   0]
 [  9 401   0   3   0  27   0   0   0   0]
 [  0   0 231   0   0   0   0   0   0   0]
 [  0   5   0 223   0   1   0   0   0   0]
 [  0   3   0   0 231   1   0   0   0   0]
 [  8  37   0   0   0 129   0   0   0   0]
 [  2   2   0   0   0   4  52   0   0   0]
 [  0   0   0   0   1   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   1   0]
 [  0   0   0   0   0   0   0   0   0  10]]
Accuracy Score : 0.9105
Report : 
              precision    recall  f1-score   support

           0       0.97      0.88      0.92       619
           1       0.87      0.91      0.89       440
           2       1.00      1.00      1.00       231
           3       0.99      0.97      0.98       229
           4       1.00      0.98      0.99       235
           5       0.80      0.74      0.77       174
           6       0.45      0.87      0.59        60
           7       0.00      0.00      0.00         1
           9       1.00      

  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
data_text_arr=[]
data_tags_arr=[]
data_intents=[]
for i in range(7000,8357):
  data_text_arr.append(text_arr_new[i])
  data_tags_arr.append(tag_arr_new2[i])
  data_intents.append(label_arr_new[i])
print(len(data_text_arr),len(data_tags_arr),len(data_intents))

1357 1357 1357


In [17]:
cc=0
dd=0
for i in range(len(test_correct_slot)):
  c=0
  length=len(test_pred_slot[i])
  for j in range(len(test_correct_slot[i])):
    if j<len(test_pred_slot[i]):
      if test_correct_slot[i][j]==test_pred_slot[i][j]:
        c=c+1
  if c==length:
    print(test_correct_slot[i],test_pred_slot[i])
    cc=cc+1
  else:
    dd=dd+1

['O', 'O', 'O'] ['O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O'] ['O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'B-Cost', 'I-Cost', 'O', 'O'] ['O', 'B-Cost', 'I-Cost', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Color', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Color', 'O', 'O']
['O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O'] ['O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'B-Color', 'O', 

In [18]:
print(cc)

1154


In [19]:
print(cc/1357)

0.8504053058216654


In [20]:
cc=0
dd=0
for i in range(len(valid_correct_slot)):
  c=0
  length=len(valid_pred_slot[i])
  for j in range(len(valid_correct_slot[i])):
    if j<len(valid_pred_slot[i]):
      if valid_correct_slot[i][j]==valid_pred_slot[i][j]:
        c=c+1
  if c==length:
    print(valid_correct_slot[i],valid_pred_slot[i])
    cc=cc+1
  else:
    dd=dd+1

['O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'B-Color', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'B-Color', 'O', 'O']
['O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Cost', 'I-Cost', 'I-Cost'] ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Cost', 'I-Cost', 'I-Cost']
['O', 'O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'B-Brand', 'O', 'O', 'B-Weight_g', 'O'] ['O', 'O', 'O', 'B-Brand', 'O', 'O', 'B-Weight_g', 'O']
['O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O']
['O', 'O', 'O'] ['O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O'] ['O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'B-Internal_RAM', 'O', 'O'] ['O', 'O', 'O', 'B-Internal_RAM', 'O', 'O']
['O', 'O', 'O', 'O', 'O'

In [21]:
print(cc)

1669


In [22]:
print(cc/2000)

0.8345
