### Deep Sentiment Analysis

In [0]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


### Download Data

In [0]:
![ ! -d data ] && mkdir data/
![ -f data/aclImdb_v1.tar.gz ] && echo "Skip Download"
![ ! -f data/aclImdb_v1.tar.gz ] && wget -N https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz -P data/

Skip Download


In [0]:
%%time
![ -d data/aclImdb/ ] && echo "Data already extracted"
![ ! -d data/aclImdb/ ] && tar -xzf data/aclImdb_v1.tar.gz -C data/

Data already extracted
CPU times: user 6.34 ms, sys: 4.91 ms, total: 11.2 ms
Wall time: 224 ms


### Imports

In [0]:
import os
import re
import nltk
from collections import Counter
from tqdm import tqdm_notebook
import numpy as np
import tensorflow as tf
from tensorflow.contrib import seq2seq
from tensorflow.contrib.rnn import DropoutWrapper
import random

In [0]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [0]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/bishal/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [0]:
MAX_SEQ_LEN = 50
BATCH_SIZE = 64

In [0]:
class Lang:
    def __init__(self, counter, vocab_size):
        self.word2id = {}
        self.id2word = {}
        self.pad = "<PAD>"
        self.sos = "<SOS>"
        self.eos = "<EOS>"
        self.unk = "<UNK>"
        
        self.ipad = 0
        self.isos = 1
        self.ieos = 2
        self.iunk = 3
        
        self.word2id[self.pad] = 0
        self.word2id[self.sos] = 1
        self.word2id[self.eos] = 2
        self.word2id[self.unk] = 3
        
        self.id2word[0] = self.pad
        self.id2word[1] = self.sos
        self.id2word[2] = self.eos
        self.id2word[3] = self.unk
        
        curr_id = 4
        for w, c in counter.most_common(vocab_size-curr_id):
            self.word2id[w] = curr_id
            self.id2word[curr_id] = w
            curr_id += 1
    
    def encodeSentence(self, wseq, max_len=-1):
        # wseq = nltk.tokenize.word_tokenize(s.lower().strip())
        if max_len == -1:
            return [self.word2id[w] if w in self.word2id else self.iunk for w in wseq]
        else:
            return ([self.word2id[w] if w in self.word2id else self.iunk for w in wseq] + [self.ieos] + [self.ipad]*max_len)[:max_len]
        
    def encodeSentence2(self, wseq, max_len=-1):
        # wseq = nltk.tokenize.word_tokenize(s.lower().strip()) 
        return min(max_len, len(wseq)+1), \
            ([self.word2id[w] if w in self.word2id else self.iunk for w in wseq] + \
                [self.ieos] + [self.ipad]*max_len)[:max_len]
    
    def decodeSentence(self, id_seq):
        id_seq = np.array(id_seq + [self.ieos])
        j = np.argmax(id_seq==self.ieos)
        s = ' '.join([self.id2word[x] for x in id_seq[:j]])
        s = s.replace(self.unk, "UNK")
        return s

### Let's read in the data

In [0]:
data_folder = 'data/aclImdb/'

In [0]:
rp = os.path.join(data_folder, 'train/pos')
train_positive = [os.path.join(rp, f) for f in os.listdir(rp)]
rp = os.path.join(data_folder, 'train/neg')
train_negative = [os.path.join(rp, f) for f in os.listdir(rp)]

rp = os.path.join(data_folder, 'test/pos')
test_positive = [os.path.join(rp, f) for f in os.listdir(rp)]
rp = os.path.join(data_folder, 'test/neg')
test_negative = [os.path.join(rp, f) for f in os.listdir(rp)]

#### Limit number of samples
To quickly train a small model, consider setting n_train and n_test to some relatively small numbers e.g. `1000`. Set, 
`n_train = n_test = -1` to use all the samples available.

In [0]:
n_train = 100000
n_test = 2500

In [0]:
re_html_cleaner = re.compile(r"<.*?>")

In [0]:
en_counter = Counter()
train_data = []
for _fname in tqdm_notebook(train_positive[:n_train], desc="Crunching +ve samples: "):
    with open(_fname) as f:
        text = f.read().strip()
        text = re_html_cleaner.sub(" ", text)
        wseq = nltk.tokenize.word_tokenize(text.lower())
        en_counter += Counter(wseq)
        train_data.append((wseq, 1))
        
for _fname in tqdm_notebook(train_negative[:n_train], desc="Crunching -ve samples: "):
    with open(_fname) as f:
        text = f.read().strip()
        text = re_html_cleaner.sub(" ", text)
        wseq = nltk.tokenize.word_tokenize(text.lower())
        en_counter += Counter(wseq)
        train_data.append((wseq, 0))

HBox(children=(IntProgress(value=0, description='Crunching +ve samples: ', max=12500, style=ProgressStyle(desc…




HBox(children=(IntProgress(value=0, description='Crunching -ve samples: ', max=12500, style=ProgressStyle(desc…




In [0]:
test_data = []
for _fname in tqdm_notebook(test_positive[:n_test], desc="Crunching +ve samples: "):
    with open(_fname) as f:
        text = f.read().strip()
        text = re_html_cleaner.sub(" ", text)
        wseq = nltk.tokenize.word_tokenize(text.lower())
        test_data.append((wseq, 1))
        
for _fname in tqdm_notebook(test_negative[:n_test], desc="Crunching -ve samples: "):
    with open(_fname) as f:
        text = f.read().strip()
        text = re_html_cleaner.sub(" ", text)
        wseq = nltk.tokenize.word_tokenize(text.lower())
        test_data.append((wseq, 0))

HBox(children=(IntProgress(value=0, description='Crunching +ve samples: ', max=2500, style=ProgressStyle(descr…




HBox(children=(IntProgress(value=0, description='Crunching -ve samples: ', max=2500, style=ProgressStyle(descr…




In [0]:
# A few sample english words
print("\nMost common en words in dataset:\n", en_counter.most_common(10))

print("\nTotal (en)words gathered from dataset:", len(en_counter))


Most common en words in dataset:
 [('the', 334752), (',', 275881), ('.', 271448), ('and', 163327), ('a', 162162), ('of', 145428), ('to', 135195), ('is', 110396), ('it', 95772), ('in', 93249)]

Total (en)words gathered from dataset: 105920


In [0]:
V = 10000

In [0]:
en_lang = Lang(en_counter, V)

In [0]:
wseq = nltk.tokenize.word_tokenize("Where are you going?".lower())
print("Test en encoding:", en_lang.encodeSentence(wseq))
print("Test en decoding:", en_lang.decodeSentence(en_lang.encodeSentence(wseq, 10)))

Test en encoding: [131, 33, 27, 182, 58]
Test en decoding: where are you going ?


#### Word Embedding Matrix

In [0]:
en_word_emb_matrix = tf.get_variable("en_word_emb_matrix", (V, 300), dtype=tf.float32)

Instructions for updating:
Colocations handled automatically by placer.


#### Placeholders

In [0]:
keep_prob = tf.placeholder(tf.float32)

In [0]:
input_ids = tf.placeholder(tf.int32, (None, MAX_SEQ_LEN))
input_lens = tf.placeholder(tf.int32, (None, ))

In [0]:
y_placeholder = tf.placeholder(tf.int32, (None,))

#### Tensorflow Graphs

In [0]:
input_emb = tf.nn.embedding_lookup(en_word_emb_matrix, input_ids)

In [0]:
input_emb.shape

TensorShape([Dimension(None), Dimension(50), Dimension(300)])

#### Encoder

##### RNN Units

In [0]:
# Create a single GRU cell
encoder_cell = tf.nn.rnn_cell.GRUCell(128)
# Add dropout : Dropout is applied to the hidden state output at every time step
encoder_cell = DropoutWrapper(encoder_cell, output_keep_prob=keep_prob)

Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.


In [0]:
# Unrolling of time-sequence
# Apply the encoder cell on input sequence and unroll computation upto
# max sequence length
enc_outputs, enc_state = tf.nn.dynamic_rnn(
    encoder_cell, input_emb, sequence_length=input_lens, initial_state=encoder_cell.zero_state(BATCH_SIZE, dtype=tf.float32)
)

Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [0]:
enc_outputs.shape

TensorShape([Dimension(64), Dimension(50), Dimension(128)])

In [0]:
enc_state.shape

TensorShape([Dimension(64), Dimension(128)])

### Classifier Layer

In [0]:
# A simple fully connected linear layer
# W^T*X + b
dense_layer = tf.layers.Dense(1)

#### Approaches:
As input to the final linear layers use mean of the hidden states?

or

As input to the final linear layers use the last hidden state?

##### Approch 1: Take mean of enc_outputs across dimension 1
- **IMPORTANT:** Need to **mask** the positions in input sentence that doesn't contain any inputs

In [0]:
# masks = tf.sequence_mask(input_lens, MAX_SEQ_LEN, dtype=tf.float32, name='masks')
# class_prob = tf.nn.sigmoid(
#                 dense_layer(
#                     tf.reduce_mean(
#                         enc_outputs*masks[:, :, None], 1)
#                 )
# ) 

# print(class_prob.shape)

##### Approch 2: Use enc_state (final hidden state)

In [0]:
class_prob = tf.nn.sigmoid(dense_layer(enc_state))
print(class_prob.shape)

(64, 1)


#### Loss and Optimizers [softmax_cross_entropy]
Note that `onehot_labels` and `logits` must have the same shape, e.g. `[batch_size, num_classes]`

In [0]:
print(y_placeholder.shape)
print(class_prob.shape)

(?,)
(64, 1)


In [0]:
# Loss function - softmax cross entropy
y_ = tf.cast(y_placeholder[:, None], dtype=tf.float32)
cost = -y_*tf.log(class_prob + 1e-12) - (1-y_)*tf.log(1-class_prob + 1e-12)
cost = tf.reduce_mean(cost)

# Optimizer
optimizer = tf.train.AdamOptimizer(0.001)

In [0]:
train_op = optimizer.minimize(cost)

In [0]:
init = tf.global_variables_initializer()

#### Tensorflow Sessions

In [0]:
sess_config = tf.ConfigProto()
sess_config.gpu_options.allow_growth = True

In [0]:
sess = tf.InteractiveSession(config=sess_config)
sess.run(init)

#### Minibatch Training

In [0]:
random.seed(41)

In [0]:
random.shuffle(train_data)

In [0]:
train_n = len(train_data)

In [0]:
test_n = len(test_data)

In [0]:
def small_test():
    all_true = []
    all_preds = []
    for m in range(0, test_n, BATCH_SIZE):
        n = m + BATCH_SIZE
        if n > test_n:
            break

        input_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
        input_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
        true_class_batch = np.zeros((BATCH_SIZE))
        for i in range(m, n):
            b,a = en_lang.encodeSentence2(test_data[i][0], MAX_SEQ_LEN)
            input_batch[i-m,:] = a
            input_lens_batch[i-m] = b
            true_class_batch[i-m] = test_data[i][1]

        feed_dict={
            input_ids: input_batch,
            input_lens: input_lens_batch,
            keep_prob: 1.0
        }
        pred_batch = sess.run(class_prob, feed_dict=feed_dict)
        # acc = accuracy_score(true_class_batch, pred_batch > 0.5)
        all_true.extend(list(true_class_batch))
        all_preds.extend(list(pred_batch[:,0]))
    
    all_true = np.array(all_true)
    all_preds = np.array(all_preds)
    prec = precision_score(all_true, all_preds > 0.5)*100
    rec = recall_score(all_true, all_preds > 0.5)*100
    f1 = f1_score(all_true, all_preds > 0.5)*100
    print(f"Precision: {prec:2.2F}, Recall: {rec:2.2F}, F1-Score: {f1:2.2F}")

In [0]:
for _e in range(5):
    # Mix things up a bit.
    random.shuffle(train_data)
    pbar = tqdm_notebook(range(0, train_n, BATCH_SIZE))
    batch_loss = 0
    bxi = 0
    for m in pbar:
        n = m + BATCH_SIZE
        if n <= train_n:
            # print("Epoch Complete... \n")

            input_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
            input_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
            true_class_batch = np.zeros((BATCH_SIZE))
            for i in range(m, n):
                b,a = en_lang.encodeSentence2(train_data[i][0], MAX_SEQ_LEN)
                input_batch[i-m,:] = a
                input_lens_batch[i-m] = b
                true_class_batch[i-m] = train_data[i][1]

            feed_dict={
                input_ids: input_batch,
                input_lens: input_lens_batch,
                y_placeholder: true_class_batch,
                keep_prob: 0.6
            }
            sess.run(train_op, feed_dict=feed_dict)
            batch_loss += sess.run(cost, feed_dict=feed_dict)
            pbar.set_description(f"Epoch: {_e} >> Loss: {batch_loss/(bxi+1):2.2F}:")
            bxi += 1
            if (1 + n//BATCH_SIZE) % 10 == 0:
                small_test()

HBox(children=(IntProgress(value=0, max=391), HTML(value='')))

Precision: 50.30, Recall: 88.64, F1-Score: 64.18
Precision: 50.77, Recall: 91.28, F1-Score: 65.25
Precision: 54.95, Recall: 83.52, F1-Score: 66.29
Precision: 57.33, Recall: 91.72, F1-Score: 70.55
Precision: 77.05, Recall: 37.20, F1-Score: 50.18
Precision: 65.37, Recall: 82.24, F1-Score: 72.84
Precision: 69.77, Recall: 73.20, F1-Score: 71.44
Precision: 65.84, Recall: 84.48, F1-Score: 74.00
Precision: 73.20, Recall: 69.36, F1-Score: 71.23
Precision: 69.46, Recall: 79.16, F1-Score: 74.00
Precision: 75.91, Recall: 68.44, F1-Score: 71.98
Precision: 76.59, Recall: 66.76, F1-Score: 71.34
Precision: 71.31, Recall: 79.24, F1-Score: 75.07
Precision: 76.40, Recall: 68.64, F1-Score: 72.31
Precision: 73.93, Recall: 74.96, F1-Score: 74.44
Precision: 81.44, Recall: 50.56, F1-Score: 62.39
Precision: 72.21, Recall: 78.68, F1-Score: 75.31
Precision: 78.34, Recall: 66.56, F1-Score: 71.97
Precision: 70.41, Recall: 83.20, F1-Score: 76.27
Precision: 73.46, Recall: 75.60, F1-Score: 74.51
Precision: 71.12, Re

HBox(children=(IntProgress(value=0, max=391), HTML(value='')))

Precision: 73.85, Recall: 80.08, F1-Score: 76.84
Precision: 76.18, Recall: 74.32, F1-Score: 75.24
Precision: 77.32, Recall: 70.76, F1-Score: 73.89
Precision: 76.12, Recall: 75.12, F1-Score: 75.62
Precision: 74.19, Recall: 78.76, F1-Score: 76.41
Precision: 76.54, Recall: 72.04, F1-Score: 74.22
Precision: 78.19, Recall: 67.40, F1-Score: 72.40
Precision: 71.75, Recall: 80.68, F1-Score: 75.96
Precision: 75.86, Recall: 73.28, F1-Score: 74.55
Precision: 71.11, Recall: 81.64, F1-Score: 76.01
Precision: 77.24, Recall: 68.28, F1-Score: 72.48
Precision: 72.17, Recall: 79.16, F1-Score: 75.51
Precision: 73.09, Recall: 77.68, F1-Score: 75.32
Precision: 75.79, Recall: 72.52, F1-Score: 74.12
Precision: 73.05, Recall: 78.92, F1-Score: 75.87
Precision: 77.82, Recall: 69.88, F1-Score: 73.64
Precision: 77.57, Recall: 69.32, F1-Score: 73.22
Precision: 75.01, Recall: 75.64, F1-Score: 75.32
Precision: 73.56, Recall: 79.00, F1-Score: 76.18
Precision: 79.96, Recall: 65.92, F1-Score: 72.26
Precision: 75.16, Re

HBox(children=(IntProgress(value=0, max=391), HTML(value='')))

Precision: 78.82, Recall: 73.52, F1-Score: 76.08
Precision: 78.27, Recall: 74.04, F1-Score: 76.09
Precision: 76.93, Recall: 75.08, F1-Score: 75.99
Precision: 75.60, Recall: 75.84, F1-Score: 75.72
Precision: 77.53, Recall: 72.60, F1-Score: 74.98
Precision: 75.00, Recall: 77.76, F1-Score: 76.36
Precision: 75.66, Recall: 75.48, F1-Score: 75.57
Precision: 75.95, Recall: 73.52, F1-Score: 74.72
Precision: 75.36, Recall: 76.32, F1-Score: 75.83
Precision: 74.70, Recall: 76.64, F1-Score: 75.66
Precision: 73.71, Recall: 78.40, F1-Score: 75.98
Precision: 77.25, Recall: 71.72, F1-Score: 74.38
Precision: 74.83, Recall: 76.96, F1-Score: 75.88
Precision: 78.61, Recall: 63.52, F1-Score: 70.27
Precision: 73.21, Recall: 79.04, F1-Score: 76.01
Precision: 77.04, Recall: 69.92, F1-Score: 73.31
Precision: 76.99, Recall: 69.48, F1-Score: 73.04
Precision: 70.86, Recall: 81.12, F1-Score: 75.64
Precision: 77.74, Recall: 68.04, F1-Score: 72.57
Precision: 75.74, Recall: 74.44, F1-Score: 75.09
Precision: 77.92, Re

HBox(children=(IntProgress(value=0, max=391), HTML(value='')))

Precision: 76.94, Recall: 73.40, F1-Score: 75.13
Precision: 75.76, Recall: 75.88, F1-Score: 75.82
Precision: 77.94, Recall: 68.96, F1-Score: 73.17
Precision: 75.16, Recall: 73.60, F1-Score: 74.37
Precision: 76.28, Recall: 70.48, F1-Score: 73.26
Precision: 74.08, Recall: 76.04, F1-Score: 75.05
Precision: 74.90, Recall: 73.52, F1-Score: 74.20
Precision: 75.78, Recall: 72.48, F1-Score: 74.10
Precision: 76.13, Recall: 71.20, F1-Score: 73.58
Precision: 77.20, Recall: 69.76, F1-Score: 73.29
Precision: 72.74, Recall: 76.84, F1-Score: 74.73
Precision: 77.73, Recall: 68.12, F1-Score: 72.61
Precision: 74.61, Recall: 73.68, F1-Score: 74.14
Precision: 74.86, Recall: 73.00, F1-Score: 73.92
Precision: 76.04, Recall: 71.08, F1-Score: 73.48
Precision: 71.58, Recall: 76.08, F1-Score: 73.76
Precision: 76.07, Recall: 67.76, F1-Score: 71.67
Precision: 75.19, Recall: 68.60, F1-Score: 71.74
Precision: 72.29, Recall: 76.48, F1-Score: 74.32
Precision: 75.02, Recall: 72.20, F1-Score: 73.58
Precision: 75.19, Re

HBox(children=(IntProgress(value=0, max=391), HTML(value='')))

Precision: 74.07, Recall: 73.68, F1-Score: 73.87
Precision: 76.00, Recall: 70.04, F1-Score: 72.90
Precision: 74.89, Recall: 71.00, F1-Score: 72.90
Precision: 74.88, Recall: 70.00, F1-Score: 72.36
Precision: 73.78, Recall: 73.16, F1-Score: 73.47
Precision: 74.02, Recall: 72.60, F1-Score: 73.30
Precision: 74.49, Recall: 71.96, F1-Score: 73.20
Precision: 76.06, Recall: 69.52, F1-Score: 72.64
Precision: 75.21, Recall: 71.96, F1-Score: 73.55
Precision: 74.38, Recall: 73.64, F1-Score: 74.01
Precision: 75.12, Recall: 70.16, F1-Score: 72.55
Precision: 73.68, Recall: 73.44, F1-Score: 73.56
Precision: 75.44, Recall: 72.00, F1-Score: 73.68
Precision: 75.23, Recall: 71.56, F1-Score: 73.35
Precision: 74.17, Recall: 71.08, F1-Score: 72.59
Precision: 73.92, Recall: 70.64, F1-Score: 72.24
Precision: 72.26, Recall: 75.96, F1-Score: 74.06
Precision: 75.45, Recall: 66.52, F1-Score: 70.71
Precision: 73.34, Recall: 74.92, F1-Score: 74.12
Precision: 74.24, Recall: 72.76, F1-Score: 73.49
Precision: 72.34, Re

HBox(children=(IntProgress(value=0, max=391), HTML(value='')))

Precision: 75.27, Recall: 68.80, F1-Score: 71.89
Precision: 73.59, Recall: 72.68, F1-Score: 73.13
Precision: 71.99, Recall: 76.28, F1-Score: 74.07
Precision: 76.03, Recall: 67.12, F1-Score: 71.30
Precision: 73.98, Recall: 73.12, F1-Score: 73.55
Precision: 75.33, Recall: 69.24, F1-Score: 72.16
Precision: 72.51, Recall: 74.92, F1-Score: 73.70
Precision: 74.05, Recall: 70.76, F1-Score: 72.37
Precision: 74.12, Recall: 70.56, F1-Score: 72.30
Precision: 73.52, Recall: 73.28, F1-Score: 73.40
Precision: 72.05, Recall: 73.92, F1-Score: 72.97
Precision: 73.67, Recall: 69.60, F1-Score: 71.58
Precision: 73.33, Recall: 71.48, F1-Score: 72.39
Precision: 72.89, Recall: 74.96, F1-Score: 73.91
Precision: 74.76, Recall: 67.40, F1-Score: 70.89
Precision: 74.44, Recall: 68.96, F1-Score: 71.59
Precision: 72.62, Recall: 73.72, F1-Score: 73.16
Precision: 75.66, Recall: 65.04, F1-Score: 69.95
Precision: 71.67, Recall: 74.88, F1-Score: 73.24
Precision: 73.46, Recall: 70.40, F1-Score: 71.90
Precision: 74.49, Re

HBox(children=(IntProgress(value=0, max=391), HTML(value='')))

Precision: 75.17, Recall: 70.72, F1-Score: 72.88
Precision: 75.11, Recall: 69.88, F1-Score: 72.40
Precision: 73.39, Recall: 73.80, F1-Score: 73.59
Precision: 75.49, Recall: 67.88, F1-Score: 71.48
Precision: 75.10, Recall: 69.48, F1-Score: 72.18
Precision: 74.35, Recall: 68.76, F1-Score: 71.45
Precision: 74.46, Recall: 69.28, F1-Score: 71.78
Precision: 74.86, Recall: 69.20, F1-Score: 71.92
Precision: 74.05, Recall: 71.12, F1-Score: 72.56
Precision: 73.49, Recall: 71.52, F1-Score: 72.49
Precision: 76.28, Recall: 64.96, F1-Score: 70.17
Precision: 74.38, Recall: 68.76, F1-Score: 71.46
Precision: 73.44, Recall: 72.44, F1-Score: 72.94
Precision: 74.82, Recall: 67.76, F1-Score: 71.12
Precision: 73.69, Recall: 69.92, F1-Score: 71.76
Precision: 74.03, Recall: 70.92, F1-Score: 72.44
Precision: 73.96, Recall: 69.52, F1-Score: 71.67
Precision: 73.51, Recall: 69.92, F1-Score: 71.67
Precision: 76.27, Recall: 66.08, F1-Score: 70.81
Precision: 74.91, Recall: 69.88, F1-Score: 72.31
Precision: 74.54, Re

HBox(children=(IntProgress(value=0, max=391), HTML(value='')))

Precision: 72.90, Recall: 71.44, F1-Score: 72.16
Precision: 74.49, Recall: 66.92, F1-Score: 70.50
Precision: 73.27, Recall: 71.80, F1-Score: 72.53
Precision: 72.73, Recall: 70.40, F1-Score: 71.54
Precision: 72.13, Recall: 64.80, F1-Score: 68.27
Precision: 73.15, Recall: 66.48, F1-Score: 69.66
Precision: 71.95, Recall: 71.52, F1-Score: 71.74
Precision: 73.68, Recall: 69.76, F1-Score: 71.67
Precision: 74.36, Recall: 68.68, F1-Score: 71.41
Precision: 73.17, Recall: 71.88, F1-Score: 72.52
Precision: 73.57, Recall: 70.48, F1-Score: 71.99
Precision: 73.46, Recall: 69.08, F1-Score: 71.20
Precision: 73.38, Recall: 70.12, F1-Score: 71.71
Precision: 72.91, Recall: 71.28, F1-Score: 72.09
Precision: 72.20, Recall: 71.28, F1-Score: 71.74
Precision: 71.33, Recall: 73.76, F1-Score: 72.53
Precision: 74.00, Recall: 69.00, F1-Score: 71.41
Precision: 74.83, Recall: 65.88, F1-Score: 70.07
Precision: 73.84, Recall: 69.68, F1-Score: 71.70
Precision: 74.70, Recall: 67.44, F1-Score: 70.89
Precision: 74.22, Re

HBox(children=(IntProgress(value=0, max=391), HTML(value='')))

Precision: 72.21, Recall: 71.60, F1-Score: 71.90
Precision: 74.76, Recall: 65.52, F1-Score: 69.84
Precision: 74.14, Recall: 68.00, F1-Score: 70.94
Precision: 72.53, Recall: 72.56, F1-Score: 72.55
Precision: 73.70, Recall: 69.04, F1-Score: 71.29
Precision: 75.89, Recall: 64.60, F1-Score: 69.79
Precision: 72.12, Recall: 71.60, F1-Score: 71.86
Precision: 72.12, Recall: 70.88, F1-Score: 71.49
Precision: 73.51, Recall: 69.16, F1-Score: 71.27
Precision: 74.82, Recall: 67.28, F1-Score: 70.85
Precision: 71.88, Recall: 74.76, F1-Score: 73.29
Precision: 73.70, Recall: 69.72, F1-Score: 71.65
Precision: 72.82, Recall: 68.92, F1-Score: 70.82
Precision: 71.65, Recall: 71.48, F1-Score: 71.57
Precision: 72.22, Recall: 70.92, F1-Score: 71.56
Precision: 73.43, Recall: 69.96, F1-Score: 71.65
Precision: 73.27, Recall: 68.32, F1-Score: 70.71
Precision: 69.70, Recall: 73.72, F1-Score: 71.66
Precision: 74.37, Recall: 62.44, F1-Score: 67.88
Precision: 72.62, Recall: 68.52, F1-Score: 70.51
Precision: 72.08, Re

HBox(children=(IntProgress(value=0, max=391), HTML(value='')))

Precision: 74.58, Recall: 65.36, F1-Score: 69.67
Precision: 71.66, Recall: 71.32, F1-Score: 71.49
Precision: 71.91, Recall: 70.44, F1-Score: 71.17
Precision: 74.42, Recall: 64.60, F1-Score: 69.16
Precision: 74.16, Recall: 66.92, F1-Score: 70.35
Precision: 72.85, Recall: 70.20, F1-Score: 71.50
Precision: 71.93, Recall: 72.16, F1-Score: 72.04
Precision: 73.02, Recall: 68.08, F1-Score: 70.46
Precision: 71.28, Recall: 72.28, F1-Score: 71.78
Precision: 72.09, Recall: 69.52, F1-Score: 70.78
Precision: 73.93, Recall: 67.28, F1-Score: 70.45
Precision: 73.56, Recall: 67.88, F1-Score: 70.61
Precision: 72.76, Recall: 68.92, F1-Score: 70.79
Precision: 71.49, Recall: 71.00, F1-Score: 71.24
Precision: 72.00, Recall: 68.92, F1-Score: 70.43
Precision: 72.26, Recall: 70.64, F1-Score: 71.44
Precision: 72.83, Recall: 70.24, F1-Score: 71.51
Precision: 72.42, Recall: 68.28, F1-Score: 70.29
Precision: 71.27, Recall: 70.84, F1-Score: 71.05
Precision: 71.23, Recall: 71.20, F1-Score: 71.21
Precision: 72.73, Re

### Improving Further
- This was a very simple RNN based model for the task.
- It can be improved a lot by tweaking hyperparameters e.g.
 - lstm size 
 - dropout
 - learning rate 
- or modifying the architecture e.g.
 - Add bidirectional RNNs
 - Use multiple layers of RNN cells
 - Add more hidden layers to the classifier