# Sentence Level Aspect-Based Sentiment Analysis with TreeLSTMs

This notebook trains a Constituency Tree-LSTM model on the Laptop review dataset using Tensorflow Fold.

In [27]:
import tensorflow as tf
sess = tf.InteractiveSession()
import tensorflow_fold as td
import gensim
import numpy as np
import math

## Data Loading & Preprocessing

We load the tree strings from the tree folder and convert them into tree objects. The list of tree objects is passed to the main model for training/evaluation.

The code in the cell below creates tree objects and provides utilities to operate over them.

In [49]:
class Node:  # a node in the tree
    def __init__(self, label = None, word=None):
        self.label = label
        self.word = word
        self.parent = None  # reference to parent
        self.left = None  # reference to left child
        self.right = None  # reference to right child
        # true if I am a leaf (could have probably derived this from if I have
        # a word)
        self.isLeaf = False
        # true if we have finished performing fowardprop on this node (note,
        # there are many ways to implement the recursion.. some might not
        # require this flag)
        self.level = 0
        #defeault intitialziation of depth
        self.has_label = False
           
class Tree:

    def __init__(self, treeString, openChar='(', closeChar=')', label_size = 18):
        tokens = []
        self.open = '('
        self.close = ')'
        for toks in treeString.strip().split():
            tokens += list(toks)
        self.root = self.parse(tokens, label_size = label_size)
          
        self.self_binarize() #ensure binary parse tree - a node can have 0 or 2 child nodes
        self.binary = check_for_binarization(self.root)
        assert self.binary == True, "Tree is not binary"
        self.depth = get_depth(self.root)
        self.levels = max(math.floor(math.log(float(self.depth)) / math.log(float(2)))-1, 0)
        self.labels = get_labels(self.root)
       

    def parse(self, tokens, parent=None, label_size = 18):
        
        assert tokens[0] == self.open, "Malformed tree"
        assert tokens[-1] == self.close, "Malformed tree"
        
        split = 1  # position after open 
        marker  = 1
        countOpen = countClose = 0
        label = None
        if (split + label_size) < len(tokens):
         str1 = ''.join(tokens[split: (split + label_size)])
         if str1.isdigit():
        
            label = tokens[split: (split + label_size)]
            label = np.asarray(label).astype(int)
            split += label_size
            marker += label_size 
                
        if tokens[split] == self.open:
            countOpen += 1
            split += 1
        # Find where left child and right child split
        while countOpen != countClose:
            if tokens[split] == self.open:
                countOpen += 1
            if tokens[split] == self.close:
                countClose += 1
            split += 1

        # New node
        if isinstance(label, np.ndarray):
         node = Node(label)  
         node.has_label = True
        else:
         node = Node()   
        
        if parent: 
         node.parent = parent
         node.level = parent.level + 1

        # leaf Node
        if countOpen == 0:
            node.word = ''.join(tokens[marker:-1])  # distinguish between lower and upper. Important for words like Apple
            node.isLeaf = True
            return node

        node.left = self.parse(tokens[marker:split], parent=node)
        if  (tokens[split] == self.open) :
         node.right = self.parse(tokens[split:-1], parent=node)

        return node
     
    def get_words(self):
      def get_leaves(node):
        if node is None:
         return []
        if node.isLeaf:
         return [node]
        else:
         return getLeaves(node.left) + getLeaves(node.right)
      leaves = getLeaves(self.root)
      words = [node.word for node in leaves]
      return words


     
    def self_binarize(self):
     
     def binarize_tree(node):
      
      if node.isLeaf:
       return
      elif ((node.left is not None) & (node.right is not None)):
       binarize_tree(node.left)
       binarize_tree(node.right)
      else:
       #fuse parent node with child node
       node.left.label = node.label
       node.left.level -= 1
       
       if (node.level != 0):
        if (node.parent.right is node):
          node.parent.right = node.left
        else:
          node.parent.left = node.left 
        node.left.parent = node.parent
       
       else:
        self.root = node.left
        node.left.parent = None
        self.root.has_label = True
       
       binarize_tree(node.left)
     binarize_tree(self.root)


 
#optional function to push labels to child nodes from root node, Not needed for LSTM trees    
def propagate_label(node, levels, depth):
    
    if node is None:
         return
    if (node.level > levels):
         return
    
    if node.parent:
     node.label = node.parent.label
     node.has_label = True
    propagate_label(node.left, levels, depth)
    propagate_label(node.right, levels, depth)

   
def get_depth(node):
    if node is None:
         return

    if node.isLeaf:
      return 0
    return (1+ max(get_depth(node.left), get_depth(node.right)))  


def get_labels(node):
    if node is None:
        return []
    if node.has_label == False:
        return []
    return get_labels(node.left) + get_labels(node.right) + [node.label]



def check_for_binarization(node): #check whether we have a binary parse tree
      
      if node.isLeaf:
        return True
      elif (node.right is None):
        return False 
      else:
       b1 = check_for_binarization(node.left) 
       b2 = check_for_binarization(node.right)
      return (b1 & b2)
    

We load the strings and convert them into a list of tree objects.

In [50]:
def loadTrees(dataSet='train'):
    """
    Loads training trees. Maps leaf node words to word ids.
    """
    file = 'trees/%s.txt' % dataSet
    print ("Loading %s trees.." % dataSet)
    with open(file, 'r') as fid:
        
        trees = [Tree(l) for l in fid.readlines()]

    return trees

In [51]:
train_trees = loadTrees('train')
dev_trees = loadTrees('dev')
test_trees = loadTrees('test')

Loading train trees..
Loading dev trees..
Loading test trees..


Create a list of root nodes for each of the tree objects.

In [52]:
train_nodes = [t.root for t in train_trees]
dev_nodes = [t.root for t in dev_trees]
test_nodes = [t.root for t in test_trees]

Load the entire Google Word2vec corpus into memory. This will take a few minutes.

In [19]:
def loadmodel():
    print("Loading Google Word2vecs....")
    model = gensim.models.KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300.bin.gz', binary = True)
    return model


In [22]:
model = loadmodel()

Loading Google Word2vecs....


Create a dictionary that maps a word to word2vec only for words in the  training, dev and test set.

In [53]:
#only retrain words that are in train, dev and test sets
def filter_model(model):
    filtered_dict = {}
    trees = loadTrees('train') + loadTrees('dev') + loadTrees('test')
    words = [t.get_words() for t in trees]
    vocab = set()
    for word in words:
        vocab.update(word)
    for word in vocab:
        if word in model.vocab:
            filtered_dict[word] = model[word]
    return filtered_dict

In [54]:
filtered_model = filter_model(model)

Loading train trees..
Loading dev trees..
Loading test trees..


Loads embedings, returns weight matrix and dict from words to indices.

In [55]:
def load_embeddings(filtered_model):
  print('loading word embeddings')
  weight_vectors = []
  word_idx = {}
  for word, vector in filtered_model.items():
    word_idx[word] = len(weight_vectors)
    weight_vectors.append(np.array(vector, dtype=np.float32))
  # Random embedding vector for unknown words.
  weight_vectors.append(np.random.uniform(
      -0.05, 0.05, weight_vectors[0].shape).astype(np.float32))
  return np.stack(weight_vectors), word_idx

In [56]:
weight_matrix, word_idx = load_embeddings(filtered_model)

loading word embeddings


In [57]:
class BinaryTreeLSTMCell(tf.contrib.rnn.BasicLSTMCell):
  """LSTM with two state inputs.

  This is the model described in section 3.2 of 'Improved Semantic
  Representations From Tree-Structured Long Short-Term Memory
  Networks' <http://arxiv.org/pdf/1503.00075.pdf>, with recurrent
  dropout as described in 'Recurrent Dropout without Memory Loss'
  <http://arxiv.org/pdf/1603.05118.pdf>.
  """

  def __init__(self, num_units, keep_prob=1.0):
    """Initialize the cell.

    Args:
      num_units: int, The number of units in the LSTM cell.
      keep_prob: Keep probability for recurrent dropout.
    """
    super(BinaryTreeLSTMCell, self).__init__(num_units)
    self._keep_prob = keep_prob

  def __call__(self, inputs, state, scope=None):
    with tf.variable_scope(scope or type(self).__name__):
      lhs, rhs = state
      c0, h0 = lhs
      c1, h1 = rhs
      concat = tf.contrib.layers.linear(
          tf.concat([inputs, h0, h1], 1), 5 * self._num_units)

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      i, j, f0, f1, o = tf.split(value=concat, num_or_size_splits=5, axis=1)

      j = self._activation(j)
      if not isinstance(self._keep_prob, float) or self._keep_prob < 1:
        j = tf.nn.dropout(j, self._keep_prob)

      new_c = (c0 * tf.sigmoid(f0 + self._forget_bias) +
               c1 * tf.sigmoid(f1 + self._forget_bias) +
               tf.sigmoid(i) * j)
      new_h = self._activation(new_c) * tf.sigmoid(o)

      new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)

      return new_h, new_state

In [58]:
keep_prob_ph = tf.placeholder_with_default(1.0, [])

In [95]:
lstm_num_units = 300  # Tai et al. used 150, but our regularization strategy is more effective
tree_lstm = td.ScopedLayer(
      tf.contrib.rnn.DropoutWrapper(
          BinaryTreeLSTMCell(lstm_num_units, keep_prob=keep_prob_ph),
          input_keep_prob=keep_prob_ph, output_keep_prob=keep_prob_ph),
      name_or_scope='tree_lstm')

In [206]:
NUM_ASPECTS = 18  # number of aspects
NUM_POLARITY = 3 #number of polarity classes assicated with an aspect (1 = mildly +ve or -ve, 2 = -ve, 3 = +ve)
output_layer = td.FC(NUM_ASPECTS*(NUM_POLARITY+2), activation=None,  name='output_layer')

In [235]:
word_embedding = td.Embedding(
    *weight_matrix.shape, initializer=weight_matrix, name='word_embedding', trainable = False)

In [236]:
embed_subtree = td.ForwardDeclaration(name='embed_subtree')

In [237]:
def logits_and_state():
  """Creates a block that goes from tokens to (logits, state) tuples."""
  unknown_idx = len(word_idx)
  lookup_word = lambda word: word_idx.get(word, unknown_idx)
  
  word2vec = (td.GetItem(0) >> td.InputTransform(lookup_word) >>
              td.Scalar('int32') >> word_embedding)

  pair2vec = (embed_subtree(), embed_subtree())

  # Trees are binary, so the tree layer takes two states as its input_state.
  zero_state = td.Zeros((tree_lstm.state_size,) * 2)
  # Input is a word vector.
  zero_inp = td.Zeros(word_embedding.output_type.shape[0])

  word_case = td.AllOf(word2vec, zero_state)
  pair_case = td.AllOf(zero_inp, pair2vec)

  tree2vec = td.OneOf(len, [(1, word_case), (2, pair_case)])

  return tree2vec >> tree_lstm >> (output_layer, td.Identity())

In [238]:
def tf_node_loss(logits, labels):
  logits_ = tf.reshape(logits, [-1, NUM_ASPECTS,  NUM_POLARITY + 2])
  #compute loss related to task 1: aspect detection
  binarized = tf.cast((labels > 0), tf.int32) #binarize the labels to compute loss for aspect detection 
  logits2 = tf.slice(logits_, [0,0,0], [-1,-1, 2])
  loss2 = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits2, labels=binarized), axis = 1)

  # compute loss related to task 2: polarity prediction 
  padding = tf.constant([[0,0], [0,0], [1,0]])
  logits3 = tf.pad(tf.log(tf.nn.softmax(tf.slice(logits_, [0, 0, 2], [-1,-1, -1]))), padding)
  labels2 = tf.pad(tf.slice(tf.one_hot(labels, depth = 4, axis = -1), [0,0,1], [-1,-1,-1]), padding)
  loss3 = tf.reduce_sum(tf.multiply(labels2, logits3), [1,2])
  final_loss = loss2 + tf.scalar_mul(-1.05,loss3)
     
  return final_loss

In [239]:
#Task 2: compute true positives for aspect polarities
def task2_truepositives(logits, labels):
    
    logits_ = tf.reshape(logits, [-1, NUM_ASPECTS,  NUM_POLARITY + 2])
   
    predictions = tf.cast(((( logits_[:,:, 2] ) > (logits_[:,:, 3] )) & (( logits_[:,:, 2] ) > (logits_[:,:, 4] ))), tf.float64)
    actuals = tf.cast(((labels > 0) & (labels < 2)), tf.float64)
   
    ones_like_actuals = tf.ones_like(actuals)
    zeros_like_actuals = tf.zeros_like(actuals)
    ones_like_predictions = tf.ones_like(predictions)
    zeros_like_predictions = tf.zeros_like(predictions)

    ans_1 = tf.reduce_sum(
    tf.cast(
      tf.logical_and(
           tf.equal(actuals, ones_like_actuals), 
           tf.equal(predictions, ones_like_predictions)
      ), 
      tf.float64
     ), axis = 1
    )
    
    predictions = tf.cast(((( logits_[:,:, 3] ) > (logits_[:,:, 2] )) & (( logits_[:,:, 3] ) > (logits_[:,:, 4] ))), tf.float64)
    actuals = tf.cast(((labels > 1) & (labels < 3)), tf.float64)
 
    ones_like_actuals = tf.ones_like(actuals)
    zeros_like_actuals = tf.zeros_like(actuals)
    ones_like_predictions = tf.ones_like(predictions)
    zeros_like_predictions = tf.zeros_like(predictions)

    ans_2 = tf.reduce_sum(
    tf.cast(
      tf.logical_and(
           tf.equal(actuals, ones_like_actuals), 
           tf.equal(predictions, ones_like_predictions)
      ), 
      tf.float64
     ), axis = 1
    )
    
    predictions = tf.cast(((( logits_[:,:, 4] ) > (logits_[:,:, 2] )) & (( logits_[:,:, 4] ) > (logits_[:,:, 3] ))), tf.float64)
    actuals = tf.cast(((labels > 2) & (labels < 4)), tf.float64)
    ones_like_actuals = tf.ones_like(actuals)
    zeros_like_actuals = tf.zeros_like(actuals)
    ones_like_predictions = tf.ones_like(predictions)
    zeros_like_predictions = tf.zeros_like(predictions)

    ans_3 = tf.reduce_sum(
    tf.cast(
      tf.logical_and(
           tf.equal(actuals, ones_like_actuals), 
           tf.equal(predictions, ones_like_predictions)
      ), 
      tf.float64
     ), axis = 1
    )

    return ans_1 + ans_2 + ans_3
    


In [240]:
#Task 2: compute total number of aspects
def task2_dem(logits, labels):
    actuals =  tf.cast(labels > 0, tf.float64)
    ones_like_actuals = tf.ones_like(actuals)
    return tf.reduce_sum(tf.cast(tf.equal(actuals, ones_like_actuals), tf.float64), axis = 1)
    

In [241]:
#Task 1: compute true positive rate
def tf_tpr(logits, labels):
  logits_ = tf.nn.softmax(tf.reshape(logits, [-1, NUM_ASPECTS, NUM_POLARITY + 2]))
  predictions = tf.cast(( logits_[:,:, 1] ) > (logits_[:,:, 0]), tf.float64)
 
  actuals = tf.cast( labels > 0, tf.float64)
 

  ones_like_actuals = tf.ones_like(actuals)
  zeros_like_actuals = tf.zeros_like(actuals)
  ones_like_predictions = tf.ones_like(predictions)
  zeros_like_predictions = tf.zeros_like(predictions)

  ans = tf.reduce_sum(
    tf.cast(
      tf.logical_and(
           tf.equal(actuals, ones_like_actuals), 
           tf.equal(predictions, ones_like_predictions)
      ), 
      tf.float64
    ), axis = 1
  )

  return ans

In [242]:
#Task 1: compute true negative rate
def tf_tnr(logits, labels):
  logits_ = tf.nn.softmax(tf.reshape(logits, [-1, NUM_ASPECTS, NUM_POLARITY + 2]))

  predictions = tf.cast((logits_[:,:, 1] ) > (logits_[:,:, 0]), tf.float64)
  actuals = tf.cast(labels > 0, tf.float64)

  ones_like_actuals = tf.ones_like(actuals)
  zeros_like_actuals = tf.zeros_like(actuals)
  ones_like_predictions = tf.ones_like(predictions)
  zeros_like_predictions = tf.zeros_like(predictions)

  ans = tf.reduce_sum(
    tf.cast(
      tf.logical_and(
           tf.equal(actuals, zeros_like_actuals), 
           tf.equal(predictions, zeros_like_predictions)
      ), 
      tf.float64
    ), axis = 1
  )

  return ans

In [243]:
#Task 1: compute false positive rate
def tf_fpr(logits, labels):
  logits_ = tf.nn.softmax(tf.reshape(logits, [-1, NUM_ASPECTS,  NUM_POLARITY + 2]))
  predictions = tf.cast((logits_[:,:, 1] ) > (logits_[:,:, 0]), tf.float64)
  actuals = tf.cast(labels > 0, tf.float64)

  ones_like_actuals = tf.ones_like(actuals)
  zeros_like_actuals = tf.zeros_like(actuals)
  ones_like_predictions = tf.ones_like(predictions)
  zeros_like_predictions = tf.zeros_like(predictions)

  ans = tf.reduce_sum(
    tf.cast(
      tf.logical_and(
           tf.equal(actuals, zeros_like_actuals), 
           tf.equal(predictions, ones_like_predictions)
      ), 
      tf.float64
    ), axis = 1
  )

  return ans

In [244]:
#Task 1: compute false negative rate
def tf_fnr(logits, labels):
  logits_ = tf.nn.softmax(tf.reshape(logits, [-1, NUM_ASPECTS, NUM_POLARITY + 2]))
  predictions = tf.cast((logits_[:,:, 1] ) > (logits_[:,:, 0]), tf.float64)
  actuals = tf.cast(labels > 0, tf.float64)

  ones_like_actuals = tf.ones_like(actuals)
  zeros_like_actuals = tf.zeros_like(actuals)
  ones_like_predictions = tf.ones_like(predictions)
  zeros_like_predictions = tf.zeros_like(predictions)

  ans = tf.reduce_sum(
    tf.cast(
      tf.logical_and(
           tf.equal(actuals, ones_like_actuals), 
           tf.equal(predictions, zeros_like_predictions)
      ), 
      tf.float64
    ), axis = 1
  )

  return ans

In [245]:
def add_metrics(is_root, is_neutral):
  """A block that adds metrics for loss and hits; output is the LSTM state."""
  c = td.Composition(
      name='predict(is_root=%s, is_neutral=%s)' % (is_root, is_neutral))
  with c.scope():
    # destructure the input; (labels, neutral, (logits, state))
    labels = c.input[0]
    logits = td.GetItem(0).reads(c.input[2])
    state = td.GetItem(1).reads(c.input[2])

    loss = td.Function(tf_node_loss)
    td.Metric('all_loss').reads(loss.reads(logits, labels))
    if is_root: td.Metric('root_loss').reads(loss)
   
    tpr = td.Function(tf_tpr)
    tnr = td.Function(tf_tnr)
    fpr = td.Function(tf_fpr)
    fnr = td.Function(tf_fnr)
    t2_acc = td.Function(task2_truepositives)
    t2_dem = td.Function(task2_dem)
    td.Metric('all_tpr').reads(tpr.reads(logits, labels))
    td.Metric('all_tnr').reads(tnr.reads(logits, labels))
    td.Metric('all_fpr').reads(fpr.reads(logits, labels))
    td.Metric('all_fnr').reads(fnr.reads(logits, labels)) 
    td.Metric('all_task2').reads(t2_acc.reads(logits, labels)) 
    td.Metric('all_task2dem').reads(t2_dem.reads(logits, labels)) 
    if is_root: 
        td.Metric('tpr').reads(tpr)
        td.Metric('tnr').reads(tnr)
        td.Metric('fpr').reads(fpr)
        td.Metric('fnr').reads(fnr)
        td.Metric('task2').reads(t2_acc)
        td.Metric('task2dem').reads(t2_dem)
   
    # output the state, which will be read by our by parent's LSTM cell
    c.output.reads(state)
  return c

In [246]:
def tokenize(node):
  group = []
  neutral = '2'
  if node.has_label:
  
    label = node.label
    
    neutral = '1'
  else:
    label = np.zeros((NUM_ASPECTS,),dtype=np.int)
    
  if node.isLeaf:
    group = [node.word]
  else:
    group = [node.left, node.right]
  return label, neutral, group

In [247]:
node = train_nodes[0]
label, neutral, group = tokenize(node)
print (len(group))
print (label.shape)

2
(18,)


In [248]:
def embed_tree(logits_and_state, is_root):
  """Creates a block that embeds trees; output is tree LSTM state."""
  return td.InputTransform(tokenize) >> td.OneOf(
      key_fn=lambda pair: pair[1] == '2',  # label 2 means neutral
      case_blocks=(add_metrics(is_root, is_neutral=False),
                   add_metrics(is_root, is_neutral=True)),
      pre_block=(td.Vector(NUM_ASPECTS, dtype = 'int32'), td.Scalar('int32'), logits_and_state))

In [249]:
model = embed_tree(logits_and_state(), is_root=True)

In [250]:
embed_subtree.resolve_to(embed_tree(logits_and_state(), is_root=False))

In [251]:
compiler = td.Compiler.create(model)
print('input type: %s' % model.input_type)
print('output type: %s' % model.output_type)

input type: PyObjectType()
output type: TupleType(TensorType((300,), 'float32'), TensorType((300,), 'float32'))


In [252]:
metrics = {k: tf.reduce_mean(v) for k, v in compiler.metric_tensors.items()}

In [254]:
LEARNING_RATE = 0.05
KEEP_PROB = 0.60
BATCH_SIZE = 32
EPOCHS = 80

In [255]:
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = LEARNING_RATE
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           600, 0.90, staircase=True)

In [256]:
train_feed_dict = {keep_prob_ph: KEEP_PROB}
loss = tf.reduce_mean(compiler.metric_tensors['root_loss'])
opt = tf.train.AdagradOptimizer(LEARNING_RATE)
learning_step = (
    tf.train.AdagradOptimizer(learning_rate)
    .minimize(loss, global_step=global_step)
)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [257]:
sess.run(tf.global_variables_initializer())

In [258]:
def train_step(batch):
  train_feed_dict[compiler.loom_input_tensor] = batch
  _, batch_loss = sess.run([learning_step, loss], train_feed_dict)
  return batch_loss

In [259]:
def train_epoch(train_set):
  list = [train_step(batch) for batch in td.group_by_batches(train_set, BATCH_SIZE)]
  return sum(list)/ max(len(list), 1)

In [260]:
train_set = compiler.build_loom_inputs(train_nodes)

In [261]:
dev_feed_dict = compiler.build_feed_dict(dev_nodes)

In [262]:
def dev_eval(epoch, train_loss):
  dev_metrics = sess.run(metrics, dev_feed_dict)
  dev_loss = dev_metrics['root_loss']

  tp = dev_metrics['tpr']
  tn = dev_metrics['tnr']
  fp = dev_metrics['fpr']
  fn = dev_metrics['fnr']
  
  tpr = float(tp)/(float(tp) + float(fn))
  fpr = float(fp)/(float(tp) + float(fn))
  t2_acc = float(dev_metrics['task2'])/ float(dev_metrics['task2dem'])

  recall = tpr
  if (float(tp) + float(fp)) > 0:
   precision = float(tp)/(float(tp) + float(fp))
  else: precision = 0.
  if precision + recall > 0:
   f1_score = (2 * (precision * recall)) / (precision + recall)
  else: f1_score = 0.
 
 
  print('epoch:%4d, train_loss: %.3e, dev_loss: %.3e,Task1 Precision: %.3e, Task1 Recall: %.3e, Task1 F1 score: %2.3e, Task2 Acc: %2.3e'
        % (epoch, train_loss, dev_loss, precision, recall, f1_score, t2_acc))
  return f1_score

In [263]:
best_accuracy = 0.0
save_path = 'weights/sentiment_model'
for epoch, shuffled in enumerate(td.epochs(train_set, EPOCHS), 1):
  train_loss = train_epoch(shuffled)
  f1_score = dev_eval(epoch, train_loss)
 

epoch:   1, train_loss: 5.386e+00, dev_loss: 5.296e+00,Task1 Precision: 7.143e-01, Task1 Recall: 4.274e-02, Task1 F1 score: 8.065e-02, Task2 Acc: 5.769e-01
epoch:   2, train_loss: 4.789e+00, dev_loss: 5.213e+00,Task1 Precision: 5.909e-01, Task1 Recall: 5.556e-02, Task1 F1 score: 1.016e-01, Task2 Acc: 5.812e-01
epoch:   3, train_loss: 4.687e+00, dev_loss: 4.907e+00,Task1 Precision: 0.000e+00, Task1 Recall: 0.000e+00, Task1 F1 score: 0.000e+00, Task2 Acc: 6.197e-01
epoch:   4, train_loss: 4.580e+00, dev_loss: 4.953e+00,Task1 Precision: 1.000e+00, Task1 Recall: 3.419e-02, Task1 F1 score: 6.612e-02, Task2 Acc: 6.026e-01
epoch:   5, train_loss: 4.541e+00, dev_loss: 4.887e+00,Task1 Precision: 6.522e-01, Task1 Recall: 6.410e-02, Task1 F1 score: 1.167e-01, Task2 Acc: 5.598e-01
epoch:   6, train_loss: 4.453e+00, dev_loss: 5.142e+00,Task1 Precision: 3.711e-01, Task1 Recall: 1.538e-01, Task1 F1 score: 2.175e-01, Task2 Acc: 5.983e-01
epoch:   7, train_loss: 4.342e+00, dev_loss: 4.768e+00,Task1 Pre

epoch:  54, train_loss: 2.493e+00, dev_loss: 3.779e+00,Task1 Precision: 6.108e-01, Task1 Recall: 4.359e-01, Task1 F1 score: 5.087e-01, Task2 Acc: 6.795e-01
epoch:  55, train_loss: 2.429e+00, dev_loss: 3.688e+00,Task1 Precision: 6.241e-01, Task1 Recall: 3.761e-01, Task1 F1 score: 4.693e-01, Task2 Acc: 7.051e-01
epoch:  56, train_loss: 2.431e+00, dev_loss: 3.747e+00,Task1 Precision: 5.445e-01, Task1 Recall: 4.444e-01, Task1 F1 score: 4.894e-01, Task2 Acc: 7.436e-01
epoch:  57, train_loss: 2.419e+00, dev_loss: 3.655e+00,Task1 Precision: 6.357e-01, Task1 Recall: 3.803e-01, Task1 F1 score: 4.759e-01, Task2 Acc: 6.923e-01
epoch:  58, train_loss: 2.399e+00, dev_loss: 3.595e+00,Task1 Precision: 6.026e-01, Task1 Recall: 4.017e-01, Task1 F1 score: 4.821e-01, Task2 Acc: 7.393e-01
epoch:  59, train_loss: 2.426e+00, dev_loss: 3.710e+00,Task1 Precision: 6.119e-01, Task1 Recall: 3.504e-01, Task1 F1 score: 4.457e-01, Task2 Acc: 7.222e-01
epoch:  60, train_loss: 2.372e+00, dev_loss: 3.781e+00,Task1 Pre