In [1]:
CUSTOMISED_STOP_WORDS = set(
    """
    a about above across after afterwards again all almost alone along
    already also although always am among amongst amount an and another any anyhow
    anyone anything anyway anywhere are around as at
    back be became because become becomes becoming been before beforehand behind
    being below beside besides between beyond both bottom but by
    call can cannot ca could
    did do does doing done down due during
    each eight either eleven else elsewhere empty enough even ever every
    everyone everything everywhere except
    few fifteen fifty first five for former formerly forty four from front full
    further
    get give go
    had has have he hence her here hereafter hereby herein hereupon hers herself
    him himself his how however hundred
    i if in indeed into is it its itself
    keep
    last latter latterly least less
    just
    made make many may me meanwhile might mine more moreover most mostly move much
    must my myself
    name namely neither never nevertheless next nine now
    of off often on once one only onto or other others otherwise our ours ourselves
    out over own
    part per perhaps please put
    quite
    rather re really regarding
    same say see seem seemed seeming seems serious several she should show side
    since six sixty so some somehow someone something sometime sometimes somewhere
    still such
    take ten than that the their them themselves then thence there thereafter
    thereby therefore therein thereupon these they third this those though three
    through throughout thru thus to together too top toward towards twelve twenty
    two
    under until up unless upon us used using
    various very very via was we well were what whatever when whence whenever where
    whereafter whereas whereby wherein whereupon wherever whether which while
    whither who whoever whole whom whose why will with within without would
    yet you your yours yourself yourselves
    """.split()
)

customised_contractions = ["'d", "'ll", "'m", "'re", "'s", "'ve"]
CUSTOMISED_STOP_WORDS.update(customised_contractions)

In [2]:
# run the commands below in terminal to install dependencies
# pip install spacy
# python -m spacy download en
import spacy
from spacy.lang.en import English
from spacy.lang.en.stop_words import STOP_WORDS
import string

nlp_sw = English()
nlp_n = spacy.load("en_core_web_sm")

removable_char = list(string.punctuation) + ["\n", "\r"]

# text arguement in string
# remove stop words of the provided text
# return a list of words in small letters in the text without stop words and punctuations
def remove_stop_words(text):
    text = text.lower()
    token_doc = nlp_sw(text)
    clean_text = []
    for token in token_doc:
        token_text = token.text
        token_text = token_text.replace(" ", "")
        token_text = "not" if token_text == "n't" else token_text
        if not(token_text in CUSTOMISED_STOP_WORDS or token_text in removable_char):
            if token_text and not(token_text.isspace()):
                tmp_text = token_text[1:] if token_text[0] in removable_char else token_text
                if tmp_text and not(tmp_text.isspace()):
                    clean_text.append(tmp_text)
    return clean_text

# text arguement in string
# normalise the text
# return a list of words in small letters in the normalised text
def normalise_text(text):
    text = text.lower()
    token_doc = nlp_n(text)
    normalised_text = []
    for token in token_doc:
        lemma = token.lemma_
        if not(lemma == "-PRON-"):
            normalised_text.append(lemma)
    return normalised_text

In [3]:
# reading the csv file
import pandas as pd
df = pd.read_csv('training.1600000.processed.noemoticon.csv',encoding = "ISO-8859-1")
df

Unnamed: 0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer. You shoulda got David Carr of Third Day to do it. ;D"
0,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
1,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
2,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
3,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."
4,0,1467811372,Mon Apr 06 22:20:00 PDT 2009,NO_QUERY,joy_wolf,@Kwesidei not the whole crew
...,...,...,...,...,...,...
1599994,4,2193601966,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,AmandaMarie1028,Just woke up. Having no school is the best fee...
1599995,4,2193601969,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,TheWDBoards,TheWDB.com - Very cool to hear old Walt interv...
1599996,4,2193601991,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,bpbabe,Are you ready for your MoJo Makeover? Ask me f...
1599997,4,2193602064,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,tinydiamondz,Happy 38th Birthday to my boo of alll time!!! ...


In [4]:
df = pd.read_csv('training_data.csv')
df

Unnamed: 0,Label,Sentences
0,0,is upset that he can't update his Facebook by ...
1,0,@Kenichan I dived many times for the ball. Man...
2,0,my whole body feels itchy and like its on fire
3,0,"@nationwideclass no, it's not behaving at all...."
4,0,@Kwesidei not the whole crew
...,...,...
1599994,4,Just woke up. Having no school is the best fee...
1599995,4,TheWDB.com - Very cool to hear old Walt interv...
1599996,4,Are you ready for your MoJo Makeover? Ask me f...
1599997,4,Happy 38th Birthday to my boo of alll time!!! ...


In [5]:
# 0 = negative, 2 = neutral, 4 = positive
number_of_data = 500000
data_negative = df[df.Label == 0][:int(number_of_data/2)]
data_positive = df[df.Label == 4][:int(number_of_data/2)]
data_positive["Label"] = 1
df_row_merged = pd.concat([data_negative, data_positive], ignore_index=True)
df_row_merged

Unnamed: 0,Label,Sentences
0,0,is upset that he can't update his Facebook by ...
1,0,@Kenichan I dived many times for the ball. Man...
2,0,my whole body feels itchy and like its on fire
3,0,"@nationwideclass no, it's not behaving at all...."
4,0,@Kwesidei not the whole crew
...,...,...
499995,1,@PaulBarksdale thanx for the ##followfriday re...
499996,1,@markhoppus I want to go to that Irvine show s...
499997,1,Wedding today
499998,1,no school today! happy shavuot


In [6]:
df_row_merged = df_row_merged.sample(frac=1).reset_index(drop=True)
df_row_merged

Unnamed: 0,Label,Sentences
0,1,@ginoboi @monicafrancesca @ginoandfran Request...
1,1,Received our first bit of spam from Twitter - ...
2,1,"@benzo8 Aahh, brings back memories from my yea..."
3,0,@615Redbone I know right!! Im too pissed!!! He...
4,1,Ew wtf am I listening to
...,...,...
499995,0,@FuriousPurpose thanks for headsup on spam att...
499996,1,@jinxremoving http://bit.ly/qwzEE Dan talks a...
499997,1,"@JoeCascio cool, thanks for the info."
499998,0,I don't know that I have anything to write abo...


In [8]:
training_data = df_row_merged[:int(len(df_row_merged)*0.75)]
test_data = df_row_merged[int(len(df_row_merged)*0.25):]
training_data

Unnamed: 0,Label,Sentences
0,1,@ginoboi @monicafrancesca @ginoandfran Request...
1,1,Received our first bit of spam from Twitter - ...
2,1,"@benzo8 Aahh, brings back memories from my yea..."
3,0,@615Redbone I know right!! Im too pissed!!! He...
4,1,Ew wtf am I listening to
...,...,...
374995,1,Helt okej hos mormor och morfar Both angry s...
374996,1,3 magento installs updated on customer sites
374997,1,@CHouttave requests?
374998,0,No cell phone.... I hate you HTC!!


In [9]:
# remove url in tweets
# remove tag (@) in tweets
# remove hashtags (#) in tweets
# remove stopwords
# normalise text
# this cell might take some time to run, be patient
import re
import string

url_pattern = re.compile(".*https?:\/\/")
hashtag_pattern = re.compile("#[a-z\d-]+")
tag_pattern = re.compile("^@")
punc = string.punctuation
punc = punc.replace("-", "")
punc += "“”"
punc = punc.replace ("'","")

pd.options.mode.chained_assignment = None

def process_text(text):
    tmp_no_url_hashtag = []
    for word in re.split(' |\r|\n', text):
        if url_pattern.match(word) or tag_pattern.match(word) or hashtag_pattern.match(word):
            continue
        else:
            clean_word = word.translate(str.maketrans("","", punc))
            clean_word = clean_word.replace("’","'")
            tmp_no_url_hashtag.append(clean_word)
    tmp = " ".join(tmp_no_url_hashtag)
    without_stopwords = " ".join(remove_stop_words(tmp))
    processed = " ".join(normalise_text(without_stopwords))
    return processed

for i in range (len(training_data["Sentences"])):
    training_data["Sentences"][i] = process_text(training_data["Sentences"][i])
    if i % 1000 == 0:
        print ("Processing", i, "unit")

Processing 0 unit
Processing 1000 unit
Processing 2000 unit
Processing 3000 unit
Processing 4000 unit
Processing 5000 unit
Processing 6000 unit
Processing 7000 unit
Processing 8000 unit
Processing 9000 unit
Processing 10000 unit
Processing 11000 unit
Processing 12000 unit
Processing 13000 unit
Processing 14000 unit
Processing 15000 unit
Processing 16000 unit
Processing 17000 unit
Processing 18000 unit
Processing 19000 unit
Processing 20000 unit
Processing 21000 unit
Processing 22000 unit
Processing 23000 unit
Processing 24000 unit
Processing 25000 unit
Processing 26000 unit
Processing 27000 unit
Processing 28000 unit
Processing 29000 unit
Processing 30000 unit
Processing 31000 unit
Processing 32000 unit
Processing 33000 unit
Processing 34000 unit
Processing 35000 unit
Processing 36000 unit
Processing 37000 unit
Processing 38000 unit
Processing 39000 unit
Processing 40000 unit
Processing 41000 unit
Processing 42000 unit
Processing 43000 unit
Processing 44000 unit
Processing 45000 unit
P

In [1]:
training_data

NameError: name 'training_data' is not defined

In [11]:
validation_data = training_data[:int(len(training_data)*0.1)]
training_data = training_data[int(len(training_data)*0.1):]

In [12]:
import numpy as np
train_x_val = np.asarray(training_data["Sentences"])
train_y_val = np.asarray(training_data["Label"])
valid_x_val = np.asarray(validation_data["Sentences"])
valid_y_val = np.asarray(validation_data["Label"])

In [13]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
embedding = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
# embedding = "https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1"
# embedding = "https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1"
hub_layer = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)
# hub_layer(training_data["Sentences"][:3])

In [14]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(50, activation='relu'))
# model.add(tf.keras.layers.Dense(10, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 20)                400020    
_________________________________________________________________
dense (Dense)                (None, 50)                1050      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 51        
Total params: 401,121
Trainable params: 401,121
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [16]:
history = model.fit(train_x_val,
                    train_y_val,
                    epochs=20,
                    validation_data=(valid_x_val, valid_y_val),
                    verbose=1)

Train on 337500 samples, validate on 37500 samples
Epoch 1/20


 32544/337500 [=>............................] - ETA: 10:37:34 - loss: 0.9035 - accuracy: 0.43 - ETA: 2:09:14 - loss: 0.8349 - accuracy: 0.4750 - ETA: 59:33 - loss: 0.7798 - accuracy: 0.4688  - ETA: 51:05 - loss: 0.7696 - accuracy: 0.468 - ETA: 35:25 - loss: 0.7540 - accuracy: 0.490 - ETA: 28:24 - loss: 0.7474 - accuracy: 0.496 - ETA: 22:17 - loss: 0.7342 - accuracy: 0.510 - ETA: 18:55 - loss: 0.7253 - accuracy: 0.521 - ETA: 16:07 - loss: 0.7200 - accuracy: 0.529 - ETA: 14:39 - loss: 0.7157 - accuracy: 0.537 - ETA: 13:12 - loss: 0.7122 - accuracy: 0.538 - ETA: 12:15 - loss: 0.7093 - accuracy: 0.542 - ETA: 11:27 - loss: 0.7062 - accuracy: 0.546 - ETA: 10:37 - loss: 0.7031 - accuracy: 0.548 - ETA: 10:02 - loss: 0.7015 - accuracy: 0.549 - ETA: 9:32 - loss: 0.6986 - accuracy: 0.556 - ETA: 8:58 - loss: 0.6944 - accuracy: 0.56 - ETA: 8:29 - loss: 0.6915 - accuracy: 0.56 - ETA: 8:14 - loss: 0.6892 - accuracy: 0.56 - ETA: 7:55 - loss: 0.6868 - accuracy: 0.56 - ETA: 7:34 - loss: 0.6864 - accura

















Epoch 2/20


 33792/337500 [==>...........................] - ETA: 2:37 - loss: 0.5909 - accuracy: 0.65 - ETA: 1:57 - loss: 0.5126 - accuracy: 0.77 - ETA: 1:54 - loss: 0.5136 - accuracy: 0.77 - ETA: 1:46 - loss: 0.5093 - accuracy: 0.76 - ETA: 1:40 - loss: 0.4891 - accuracy: 0.77 - ETA: 1:36 - loss: 0.4815 - accuracy: 0.78 - ETA: 1:37 - loss: 0.4790 - accuracy: 0.77 - ETA: 1:37 - loss: 0.4807 - accuracy: 0.77 - ETA: 1:35 - loss: 0.4769 - accuracy: 0.78 - ETA: 1:36 - loss: 0.4750 - accuracy: 0.78 - ETA: 1:37 - loss: 0.4717 - accuracy: 0.78 - ETA: 1:38 - loss: 0.4766 - accuracy: 0.77 - ETA: 1:39 - loss: 0.4748 - accuracy: 0.78 - ETA: 1:40 - loss: 0.4772 - accuracy: 0.77 - ETA: 1:39 - loss: 0.4774 - accuracy: 0.77 - ETA: 1:37 - loss: 0.4819 - accuracy: 0.77 - ETA: 1:35 - loss: 0.4796 - accuracy: 0.77 - ETA: 1:34 - loss: 0.4793 - accuracy: 0.77 - ETA: 1:33 - loss: 0.4821 - accuracy: 0.77 - ETA: 1:34 - loss: 0.4811 - accuracy: 0.77 - ETA: 1:34 - loss: 0.4832 - accuracy: 0.77 - ETA: 1:34 - loss: 0.4812 - 

















Epoch 3/20


 32320/337500 [=>............................] - ETA: 2:16 - loss: 0.4233 - accuracy: 0.75 - ETA: 1:43 - loss: 0.4448 - accuracy: 0.79 - ETA: 1:33 - loss: 0.4712 - accuracy: 0.77 - ETA: 1:29 - loss: 0.4862 - accuracy: 0.76 - ETA: 1:31 - loss: 0.4703 - accuracy: 0.78 - ETA: 1:37 - loss: 0.4701 - accuracy: 0.78 - ETA: 1:40 - loss: 0.4792 - accuracy: 0.77 - ETA: 1:38 - loss: 0.4880 - accuracy: 0.77 - ETA: 1:37 - loss: 0.4806 - accuracy: 0.77 - ETA: 1:39 - loss: 0.4788 - accuracy: 0.77 - ETA: 1:39 - loss: 0.4802 - accuracy: 0.77 - ETA: 1:38 - loss: 0.4856 - accuracy: 0.76 - ETA: 1:37 - loss: 0.4899 - accuracy: 0.76 - ETA: 1:37 - loss: 0.4853 - accuracy: 0.77 - ETA: 1:38 - loss: 0.4811 - accuracy: 0.77 - ETA: 1:39 - loss: 0.4821 - accuracy: 0.77 - ETA: 1:40 - loss: 0.4793 - accuracy: 0.77 - ETA: 1:40 - loss: 0.4827 - accuracy: 0.76 - ETA: 1:41 - loss: 0.4808 - accuracy: 0.77 - ETA: 1:42 - loss: 0.4815 - accuracy: 0.76 - ETA: 1:42 - loss: 0.4824 - accuracy: 0.76 - ETA: 1:42 - loss: 0.4854 - 

















Epoch 4/20


 31904/337500 [=>............................] - ETA: 3:51 - loss: 0.3952 - accuracy: 0.78 - ETA: 3:19 - loss: 0.4571 - accuracy: 0.77 - ETA: 3:01 - loss: 0.4694 - accuracy: 0.77 - ETA: 2:48 - loss: 0.4565 - accuracy: 0.77 - ETA: 2:33 - loss: 0.4506 - accuracy: 0.77 - ETA: 2:16 - loss: 0.4849 - accuracy: 0.76 - ETA: 2:11 - loss: 0.4877 - accuracy: 0.75 - ETA: 2:04 - loss: 0.4774 - accuracy: 0.76 - ETA: 2:03 - loss: 0.4759 - accuracy: 0.77 - ETA: 2:01 - loss: 0.4797 - accuracy: 0.76 - ETA: 1:57 - loss: 0.4768 - accuracy: 0.76 - ETA: 1:56 - loss: 0.4709 - accuracy: 0.77 - ETA: 1:54 - loss: 0.4707 - accuracy: 0.76 - ETA: 1:53 - loss: 0.4725 - accuracy: 0.76 - ETA: 1:51 - loss: 0.4776 - accuracy: 0.76 - ETA: 1:51 - loss: 0.4773 - accuracy: 0.76 - ETA: 1:53 - loss: 0.4748 - accuracy: 0.76 - ETA: 1:55 - loss: 0.4743 - accuracy: 0.77 - ETA: 1:57 - loss: 0.4710 - accuracy: 0.77 - ETA: 1:57 - loss: 0.4699 - accuracy: 0.77 - ETA: 1:58 - loss: 0.4701 - accuracy: 0.77 - ETA: 1:56 - loss: 0.4715 - 

















Epoch 5/20


 34144/337500 [==>...........................] - ETA: 1:34 - loss: 0.4821 - accuracy: 0.78 - ETA: 1:31 - loss: 0.5094 - accuracy: 0.72 - ETA: 1:37 - loss: 0.4944 - accuracy: 0.73 - ETA: 1:43 - loss: 0.4887 - accuracy: 0.73 - ETA: 1:42 - loss: 0.4840 - accuracy: 0.74 - ETA: 1:45 - loss: 0.4759 - accuracy: 0.75 - ETA: 1:45 - loss: 0.4733 - accuracy: 0.75 - ETA: 1:43 - loss: 0.4643 - accuracy: 0.76 - ETA: 1:44 - loss: 0.4682 - accuracy: 0.76 - ETA: 1:45 - loss: 0.4690 - accuracy: 0.76 - ETA: 1:46 - loss: 0.4713 - accuracy: 0.75 - ETA: 1:49 - loss: 0.4759 - accuracy: 0.75 - ETA: 1:48 - loss: 0.4775 - accuracy: 0.75 - ETA: 1:46 - loss: 0.4752 - accuracy: 0.75 - ETA: 1:45 - loss: 0.4693 - accuracy: 0.76 - ETA: 1:45 - loss: 0.4702 - accuracy: 0.76 - ETA: 1:44 - loss: 0.4698 - accuracy: 0.76 - ETA: 1:44 - loss: 0.4663 - accuracy: 0.76 - ETA: 1:43 - loss: 0.4654 - accuracy: 0.77 - ETA: 1:43 - loss: 0.4679 - accuracy: 0.76 - ETA: 1:43 - loss: 0.4647 - accuracy: 0.76 - ETA: 1:43 - loss: 0.4633 - 















Epoch 6/20


 34176/337500 [==>...........................] - ETA: 1:24 - loss: 0.5265 - accuracy: 0.71 - ETA: 1:31 - loss: 0.4846 - accuracy: 0.78 - ETA: 1:31 - loss: 0.4889 - accuracy: 0.77 - ETA: 1:32 - loss: 0.4748 - accuracy: 0.78 - ETA: 1:33 - loss: 0.4559 - accuracy: 0.79 - ETA: 1:36 - loss: 0.4561 - accuracy: 0.79 - ETA: 1:38 - loss: 0.4574 - accuracy: 0.79 - ETA: 1:38 - loss: 0.4614 - accuracy: 0.79 - ETA: 1:35 - loss: 0.4661 - accuracy: 0.78 - ETA: 1:34 - loss: 0.4607 - accuracy: 0.78 - ETA: 1:34 - loss: 0.4649 - accuracy: 0.78 - ETA: 1:34 - loss: 0.4710 - accuracy: 0.77 - ETA: 1:34 - loss: 0.4693 - accuracy: 0.77 - ETA: 1:33 - loss: 0.4640 - accuracy: 0.78 - ETA: 1:34 - loss: 0.4630 - accuracy: 0.77 - ETA: 1:33 - loss: 0.4663 - accuracy: 0.77 - ETA: 1:34 - loss: 0.4669 - accuracy: 0.78 - ETA: 1:34 - loss: 0.4670 - accuracy: 0.77 - ETA: 1:34 - loss: 0.4660 - accuracy: 0.77 - ETA: 1:34 - loss: 0.4625 - accuracy: 0.78 - ETA: 1:34 - loss: 0.4646 - accuracy: 0.78 - ETA: 1:34 - loss: 0.4637 - 















Epoch 7/20


 35552/337500 [==>...........................] - ETA: 1:34 - loss: 0.4735 - accuracy: 0.78 - ETA: 1:31 - loss: 0.4452 - accuracy: 0.80 - ETA: 1:32 - loss: 0.4744 - accuracy: 0.77 - ETA: 1:37 - loss: 0.4881 - accuracy: 0.76 - ETA: 1:39 - loss: 0.4827 - accuracy: 0.77 - ETA: 1:37 - loss: 0.4809 - accuracy: 0.76 - ETA: 1:34 - loss: 0.4719 - accuracy: 0.77 - ETA: 1:33 - loss: 0.4700 - accuracy: 0.77 - ETA: 1:34 - loss: 0.4659 - accuracy: 0.77 - ETA: 1:34 - loss: 0.4715 - accuracy: 0.76 - ETA: 1:36 - loss: 0.4688 - accuracy: 0.76 - ETA: 1:36 - loss: 0.4644 - accuracy: 0.77 - ETA: 1:35 - loss: 0.4662 - accuracy: 0.77 - ETA: 1:36 - loss: 0.4682 - accuracy: 0.76 - ETA: 1:36 - loss: 0.4711 - accuracy: 0.76 - ETA: 1:37 - loss: 0.4683 - accuracy: 0.77 - ETA: 1:38 - loss: 0.4664 - accuracy: 0.77 - ETA: 1:38 - loss: 0.4650 - accuracy: 0.77 - ETA: 1:39 - loss: 0.4620 - accuracy: 0.77 - ETA: 1:38 - loss: 0.4625 - accuracy: 0.77 - ETA: 1:38 - loss: 0.4595 - accuracy: 0.77 - ETA: 1:38 - loss: 0.4611 - 

















Epoch 8/20


 36352/337500 [==>...........................] - ETA: 1:34 - loss: 0.2590 - accuracy: 0.96 - ETA: 1:40 - loss: 0.3941 - accuracy: 0.83 - ETA: 1:40 - loss: 0.4205 - accuracy: 0.80 - ETA: 1:39 - loss: 0.4340 - accuracy: 0.79 - ETA: 1:35 - loss: 0.4302 - accuracy: 0.79 - ETA: 1:37 - loss: 0.4227 - accuracy: 0.80 - ETA: 1:38 - loss: 0.4311 - accuracy: 0.79 - ETA: 1:41 - loss: 0.4326 - accuracy: 0.79 - ETA: 1:40 - loss: 0.4387 - accuracy: 0.79 - ETA: 1:39 - loss: 0.4440 - accuracy: 0.78 - ETA: 1:41 - loss: 0.4438 - accuracy: 0.78 - ETA: 1:41 - loss: 0.4467 - accuracy: 0.78 - ETA: 1:41 - loss: 0.4482 - accuracy: 0.78 - ETA: 1:40 - loss: 0.4470 - accuracy: 0.78 - ETA: 1:40 - loss: 0.4439 - accuracy: 0.78 - ETA: 1:40 - loss: 0.4446 - accuracy: 0.78 - ETA: 1:38 - loss: 0.4396 - accuracy: 0.79 - ETA: 1:38 - loss: 0.4418 - accuracy: 0.78 - ETA: 1:38 - loss: 0.4393 - accuracy: 0.78 - ETA: 1:39 - loss: 0.4385 - accuracy: 0.78 - ETA: 1:39 - loss: 0.4378 - accuracy: 0.79 - ETA: 1:38 - loss: 0.4392 - 















Epoch 9/20


 36768/337500 [==>...........................] - ETA: 1:34 - loss: 0.5544 - accuracy: 0.78 - ETA: 1:36 - loss: 0.4570 - accuracy: 0.78 - ETA: 1:30 - loss: 0.4362 - accuracy: 0.79 - ETA: 1:35 - loss: 0.4392 - accuracy: 0.79 - ETA: 1:34 - loss: 0.4480 - accuracy: 0.78 - ETA: 1:33 - loss: 0.4507 - accuracy: 0.78 - ETA: 1:34 - loss: 0.4441 - accuracy: 0.79 - ETA: 1:34 - loss: 0.4453 - accuracy: 0.79 - ETA: 1:33 - loss: 0.4497 - accuracy: 0.78 - ETA: 1:34 - loss: 0.4471 - accuracy: 0.79 - ETA: 1:35 - loss: 0.4469 - accuracy: 0.78 - ETA: 1:35 - loss: 0.4424 - accuracy: 0.79 - ETA: 1:34 - loss: 0.4404 - accuracy: 0.79 - ETA: 1:34 - loss: 0.4407 - accuracy: 0.79 - ETA: 1:33 - loss: 0.4436 - accuracy: 0.78 - ETA: 1:34 - loss: 0.4452 - accuracy: 0.78 - ETA: 1:33 - loss: 0.4433 - accuracy: 0.79 - ETA: 1:33 - loss: 0.4416 - accuracy: 0.79 - ETA: 1:33 - loss: 0.4386 - accuracy: 0.79 - ETA: 1:33 - loss: 0.4398 - accuracy: 0.79 - ETA: 1:33 - loss: 0.4426 - accuracy: 0.79 - ETA: 1:33 - loss: 0.4431 - 













Epoch 10/20


















Epoch 11/20
















Epoch 12/20


 35072/337500 [==>...........................] - ETA: 1:45 - loss: 0.3353 - accuracy: 0.87 - ETA: 1:42 - loss: 0.4444 - accuracy: 0.78 - ETA: 1:36 - loss: 0.4352 - accuracy: 0.79 - ETA: 1:36 - loss: 0.4305 - accuracy: 0.79 - ETA: 1:34 - loss: 0.4075 - accuracy: 0.81 - ETA: 1:33 - loss: 0.4178 - accuracy: 0.80 - ETA: 1:32 - loss: 0.4173 - accuracy: 0.81 - ETA: 1:30 - loss: 0.4135 - accuracy: 0.81 - ETA: 1:30 - loss: 0.4190 - accuracy: 0.81 - ETA: 1:31 - loss: 0.4234 - accuracy: 0.80 - ETA: 1:30 - loss: 0.4181 - accuracy: 0.81 - ETA: 1:31 - loss: 0.4226 - accuracy: 0.81 - ETA: 1:32 - loss: 0.4179 - accuracy: 0.81 - ETA: 1:32 - loss: 0.4169 - accuracy: 0.81 - ETA: 1:32 - loss: 0.4161 - accuracy: 0.81 - ETA: 1:32 - loss: 0.4155 - accuracy: 0.81 - ETA: 1:32 - loss: 0.4148 - accuracy: 0.81 - ETA: 1:31 - loss: 0.4138 - accuracy: 0.81 - ETA: 1:31 - loss: 0.4152 - accuracy: 0.81 - ETA: 1:31 - loss: 0.4159 - accuracy: 0.81 - ETA: 1:30 - loss: 0.4177 - accuracy: 0.81 - ETA: 1:30 - loss: 0.4205 - 











Epoch 13/20
















Epoch 14/20
















Epoch 15/20


 39264/337500 [==>...........................] - ETA: 1:34 - loss: 0.3310 - accuracy: 0.90 - ETA: 1:34 - loss: 0.3976 - accuracy: 0.81 - ETA: 1:23 - loss: 0.4135 - accuracy: 0.80 - ETA: 1:26 - loss: 0.4033 - accuracy: 0.81 - ETA: 1:31 - loss: 0.4134 - accuracy: 0.80 - ETA: 1:31 - loss: 0.4071 - accuracy: 0.80 - ETA: 1:32 - loss: 0.4097 - accuracy: 0.80 - ETA: 1:30 - loss: 0.4035 - accuracy: 0.80 - ETA: 1:31 - loss: 0.4072 - accuracy: 0.80 - ETA: 1:31 - loss: 0.4045 - accuracy: 0.80 - ETA: 1:30 - loss: 0.4055 - accuracy: 0.80 - ETA: 1:30 - loss: 0.4042 - accuracy: 0.80 - ETA: 1:31 - loss: 0.3995 - accuracy: 0.80 - ETA: 1:32 - loss: 0.3974 - accuracy: 0.80 - ETA: 1:31 - loss: 0.3967 - accuracy: 0.81 - ETA: 1:31 - loss: 0.3996 - accuracy: 0.80 - ETA: 1:31 - loss: 0.3980 - accuracy: 0.81 - ETA: 1:31 - loss: 0.3967 - accuracy: 0.80 - ETA: 1:31 - loss: 0.3981 - accuracy: 0.80 - ETA: 1:32 - loss: 0.4003 - accuracy: 0.80 - ETA: 1:32 - loss: 0.3993 - accuracy: 0.80 - ETA: 1:31 - loss: 0.3999 - 















Epoch 16/20


 36960/337500 [==>...........................] - ETA: 1:34 - loss: 0.2859 - accuracy: 0.87 - ETA: 1:31 - loss: 0.4052 - accuracy: 0.82 - ETA: 1:19 - loss: 0.4137 - accuracy: 0.81 - ETA: 1:25 - loss: 0.4059 - accuracy: 0.81 - ETA: 1:27 - loss: 0.4014 - accuracy: 0.81 - ETA: 1:28 - loss: 0.3920 - accuracy: 0.82 - ETA: 1:30 - loss: 0.3935 - accuracy: 0.81 - ETA: 1:29 - loss: 0.3945 - accuracy: 0.81 - ETA: 1:27 - loss: 0.3931 - accuracy: 0.81 - ETA: 1:27 - loss: 0.4008 - accuracy: 0.81 - ETA: 1:27 - loss: 0.4049 - accuracy: 0.81 - ETA: 1:28 - loss: 0.4074 - accuracy: 0.81 - ETA: 1:27 - loss: 0.4063 - accuracy: 0.81 - ETA: 1:26 - loss: 0.4044 - accuracy: 0.81 - ETA: 1:27 - loss: 0.4017 - accuracy: 0.81 - ETA: 1:27 - loss: 0.4037 - accuracy: 0.81 - ETA: 1:28 - loss: 0.4041 - accuracy: 0.81 - ETA: 1:27 - loss: 0.4017 - accuracy: 0.81 - ETA: 1:27 - loss: 0.3974 - accuracy: 0.81 - ETA: 1:28 - loss: 0.3933 - accuracy: 0.82 - ETA: 1:28 - loss: 0.3943 - accuracy: 0.81 - ETA: 1:27 - loss: 0.3943 - 















Epoch 17/20


 38048/337500 [==>...........................] - ETA: 1:34 - loss: 0.4813 - accuracy: 0.84 - ETA: 1:36 - loss: 0.4114 - accuracy: 0.83 - ETA: 1:22 - loss: 0.3864 - accuracy: 0.83 - ETA: 1:25 - loss: 0.3721 - accuracy: 0.84 - ETA: 1:23 - loss: 0.3722 - accuracy: 0.83 - ETA: 1:26 - loss: 0.3750 - accuracy: 0.83 - ETA: 1:26 - loss: 0.3776 - accuracy: 0.82 - ETA: 1:26 - loss: 0.3719 - accuracy: 0.82 - ETA: 1:28 - loss: 0.3755 - accuracy: 0.82 - ETA: 1:28 - loss: 0.3804 - accuracy: 0.82 - ETA: 1:28 - loss: 0.3803 - accuracy: 0.82 - ETA: 1:28 - loss: 0.3813 - accuracy: 0.82 - ETA: 1:33 - loss: 0.3830 - accuracy: 0.82 - ETA: 1:34 - loss: 0.3841 - accuracy: 0.82 - ETA: 1:31 - loss: 0.3844 - accuracy: 0.82 - ETA: 1:29 - loss: 0.3855 - accuracy: 0.81 - ETA: 1:28 - loss: 0.3885 - accuracy: 0.81 - ETA: 1:28 - loss: 0.3850 - accuracy: 0.81 - ETA: 1:28 - loss: 0.3876 - accuracy: 0.81 - ETA: 1:29 - loss: 0.3872 - accuracy: 0.81 - ETA: 1:29 - loss: 0.3852 - accuracy: 0.81 - ETA: 1:29 - loss: 0.3884 - 















Epoch 18/20


 37760/337500 [==>...........................] - ETA: 1:34 - loss: 0.3014 - accuracy: 0.84 - ETA: 1:31 - loss: 0.3522 - accuracy: 0.84 - ETA: 1:36 - loss: 0.3819 - accuracy: 0.82 - ETA: 1:36 - loss: 0.3816 - accuracy: 0.82 - ETA: 1:36 - loss: 0.3719 - accuracy: 0.82 - ETA: 1:33 - loss: 0.3795 - accuracy: 0.82 - ETA: 1:34 - loss: 0.3774 - accuracy: 0.82 - ETA: 1:33 - loss: 0.3785 - accuracy: 0.82 - ETA: 1:34 - loss: 0.3759 - accuracy: 0.82 - ETA: 1:34 - loss: 0.3756 - accuracy: 0.82 - ETA: 1:34 - loss: 0.3746 - accuracy: 0.82 - ETA: 1:34 - loss: 0.3758 - accuracy: 0.83 - ETA: 1:34 - loss: 0.3760 - accuracy: 0.82 - ETA: 1:34 - loss: 0.3747 - accuracy: 0.83 - ETA: 1:34 - loss: 0.3724 - accuracy: 0.83 - ETA: 1:35 - loss: 0.3722 - accuracy: 0.83 - ETA: 1:35 - loss: 0.3703 - accuracy: 0.83 - ETA: 1:34 - loss: 0.3724 - accuracy: 0.82 - ETA: 1:34 - loss: 0.3715 - accuracy: 0.82 - ETA: 1:33 - loss: 0.3734 - accuracy: 0.82 - ETA: 1:34 - loss: 0.3733 - accuracy: 0.82 - ETA: 1:34 - loss: 0.3737 - 















Epoch 19/20


 34304/337500 [==>...........................] - ETA: 1:55 - loss: 0.3192 - accuracy: 0.87 - ETA: 1:36 - loss: 0.3192 - accuracy: 0.85 - ETA: 1:28 - loss: 0.3469 - accuracy: 0.84 - ETA: 1:33 - loss: 0.3480 - accuracy: 0.84 - ETA: 1:32 - loss: 0.3696 - accuracy: 0.83 - ETA: 1:32 - loss: 0.3692 - accuracy: 0.83 - ETA: 1:35 - loss: 0.3755 - accuracy: 0.83 - ETA: 1:34 - loss: 0.3751 - accuracy: 0.83 - ETA: 1:37 - loss: 0.3750 - accuracy: 0.83 - ETA: 1:36 - loss: 0.3775 - accuracy: 0.82 - ETA: 1:35 - loss: 0.3807 - accuracy: 0.82 - ETA: 1:36 - loss: 0.3821 - accuracy: 0.82 - ETA: 1:37 - loss: 0.3783 - accuracy: 0.82 - ETA: 1:38 - loss: 0.3867 - accuracy: 0.82 - ETA: 1:37 - loss: 0.3872 - accuracy: 0.81 - ETA: 1:37 - loss: 0.3865 - accuracy: 0.81 - ETA: 1:38 - loss: 0.3863 - accuracy: 0.81 - ETA: 1:39 - loss: 0.3860 - accuracy: 0.82 - ETA: 1:39 - loss: 0.3892 - accuracy: 0.81 - ETA: 1:41 - loss: 0.3871 - accuracy: 0.81 - ETA: 1:42 - loss: 0.3858 - accuracy: 0.81 - ETA: 1:43 - loss: 0.3857 - 











Epoch 20/20


















In [17]:
test_x_val = np.asarray(test_data["Sentences"])
test_y_val = np.asarray(test_data["Label"])

In [18]:
results = model.evaluate(test_x_val, test_y_val)



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 - 16s 42us/sample - loss: 0.9999 - accuracy: 0.7625


In [20]:
results = model.evaluate(test_x_val, test_y_val)



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 - 16s 43us/sample - loss: 0.9999 - accuracy: 0.7625


In [21]:
def a(text):
    predict = model.predict([process_text(text)])
    return predict[0][0]

In [23]:
a('My teacher praised me')

0.08865412

In [24]:
a("the teacher praised him for the work that i did for him")

0.10704447

In [25]:
a("This movie is not garbage just because you are beside me.")

0.4106733

In [26]:
a('''
      I'm walking through Union Square Park this morning. Headphones on. Donovan's "Where is She?" comes on. 
      It becomes this amazing soundtrack for walking lethargically through the park. Everthing's in slow motion. 
      And I'm looking at the people on the benches. It's all surreal. These faces. These blank, tragic faces. Stone faces. 
      Faces of madness, especially this cigarette smoke-wrinkled woman, heavy eyeliner, defeated and staring at dogs and smoking.
      The guy with palms pressed together chanting... And me still walking slow, because it's the way I feel. 
      And Donovan walks with me...
''')

0.96681535

In [27]:
a('''  
My dog died this morning. 
She has been rather strange for a few days, and had a massive stroke last night. 
Not a pretty sight, something twitching and frightened, unable to move, covered in it's own bodily fluids. 
We called the vet around and he gave her a massive dose of anaesthetic to put her out of her misery. 
He had trouble finding a vein though, and in the end injected her directly into her jugular and heart to do it quickly. 
She'd had a good life though, she was around 16, a fair age for a dog. 
We just have Bilbo left now, who although old and rather smelly is full of life and will hopefully stick around for a while yet.
Happy new year everyone, have a fantastic night tonight and wipe the slate clean for a new start. 
Remember, life is short and precious - make the most of every possible moment. 
''')

0.0004861073

In [28]:
a('''
      Where do i start??? 
      Yesterday was valentines day. 
      This usually was the one day out of the year where i knew that i'd be by myself. Well not this year. 
      It was the  BEST  day/date i've ever had. Its also the longest date i've every been on....about 12 hours long.   
      First of all it started off by me going to go and pick her up at her place. 
      She got me a card and fun dip. The card she gave me was perfect, and the fun dip, hey who doesnt like fun dip? 
      Well after that i took her to our first place we were going to go to. 
      I didnt tell her what we were going to do, but i did tell her where we were going. I wanted it to be a surprise. 
      We went to Kensington Metropark, to go and feed the birds from out of your hand....
      its very cool, we had a really good time doing that. 
      We decided then to go walk the trail a bit, and then something happened that maked the date even better....
      we came across about 9 deer, that were only about 10 feet away from us...it was really cool...
      we were holding hands by then and just watching the deer and what they were going to do. 
      The baby deers started to follow us, which was also cool...we had a really good time at kensington.  
      
      The second place we went to was to go and eat....
      i really didnt have an idea on where we were going to go, but there was a place in livonia that i wanted to go to and try,
      and she said she was up for anything. We went to Buca di Peppo. 
      Its an italian resturant where they serve family size portions. 
      We got this shrimp pasta stuff, it was good, but there was a ton of food....
      we could only eat one serving and we were both full...sooo we left.  
      
      The third place that we went to was just a place for us to talk...so we went to the Coffee Bean Coffee House in Plymouth. 
      I think we got there around 5 and we didnt leave till 9:30...we talked about everything. 
      It was great. Never did we have a pause in conversation....which when you usually talk to someone for 4.5 hours, 
      there usually are points when you cant think of anything to say...that just didnt happen. 
      But the day/date wasnt over...  
      
      The fourth and final thing we did was we went to go see the movie "How to lose a guy in 10 days". 
      It was a really good movie, its not quite a girl movie and its not quite a guy movie...
      I enjoyed it and i know she did too...
      we were the last two people to leave the inside of the theater, cause we really didnt feel like in any kind of a rush. 
      It was late by then so i decided to take her home. We took the long way back so we could still talk some more....
      i've never been this comfortable talking to anyone for this long....
      when we got back to her place i walked her to her door, gave her a hug goodnight...
      i wanted to give her a kiss goodnight, but i want things to go smoothly between us, so we'll know when it feels right 
      (although it did last night).   
      I cant say that i have have'd a better time than i did yesterday. 
      I know that i wont be able to spend the money always like that, 
      but it was definetly worth it too have that good of a time with her...
      wouldnt change anything that happened yesterday.  Stay tuned for more....  
''')

0.9116052

In [29]:
a("tears of joy")

0.002237093

In [30]:
a('''See, once I was sweet and innocent. Where did it all go wrong ?  ''')

0.4356663

In [31]:
a('''
Rainy!!! Cool and nice. Laze in bed, cuddle in blanket, relax at a corner... like a kitty!!!!!
Cough mixture dun reallly work that well on me. 
''')

0.28344664

In [32]:
a('''      
"It's not that I hate people, I just like it better when they're not around." - Bukowski 
My favorite line from the film "Barfly."   
And on nights like tonight that line resonates like cattlecars crossing sidecreeks...whatever that means. 
Let me see if I can explain it further...  So tonight I hit the Holiday... Lots of kids in the joint. 
Loud, obnoxious fuckers who play classic rock on the jukebox. 
Granted, I can appreciate the Stones. 
And I'd like to thank the individual who played "Let it Loose," which I think is almost as great as that Bukowski line. 
But a man can only take so much greatest hits of the Doors, CCR, Tom Petty and the like... 
And I'd also like to thank the individual who played Nick Cave's "Into My Arms." 
But unfortunately all these loud fuckers were ruining the song for me.  
All right, yeah, I know I sound I'm bitching and whatnot, 
but there are times where I don't mind people being loud and having a good time and all that shit. 
But I just wasn't digging it tonight.   
Anyway, then these fuckers come in and sit a seat down from me. And they're talking all sorts of bullshit... 
And that's when the next and shoulders started tightening up. The jaw starts tighening up. 
Suddenly all these voices start to get louder. 
And I'm trying to figure out which one of these guys should have his face smashed into the bar first. 
But I take a few deep breaths, finish my round and vacate.   
Then on the L train more madness. 
I've got the headphones cranked and I can still hear this chick talking to this guy right next to me. 
And I'm thinking, Jesus, she's six inches from the guy. Does she have to talk so loud? 
Hell, I just want to listen to Tom Waits in peace. 
And the train's packed. And the time it takes to get from 1st Ave. to Bedford seems like hours. 
But at Bedford a lot of people clear out. Another deep breath.   
So now I'm back at the crib. Listening to Erik Satie. Neck and shoulders still tight as fuck. 
I'm trying to decompress. And writing this is helping cut through all the madness. 
I just needed to get this shit out before I go to sleep...
''')

0.650144

In [33]:
a("sad leh")

0.0015641765

In [34]:
a("This movie is not garbage just because you are beside me.")

0.4106733

In [35]:
a("This movie is garbage")

0.96061116

In [36]:
a("gucci gang gucci gucci gang")

0.50526696

In [37]:
a('i am not happy')

0.16576913

In [38]:
a('my teacher congratulates me')

0.98947585

In [39]:
a('my teacher praised me')

0.08865412

In [40]:
a('This movie is trash')

0.85513216

In [41]:
a('I hate this movie')

0.12251593