In [1]:
# run the commands below in terminal to install dependencies
# pip install spacy
# python -m spacy download en
import spacy
from spacy.lang.en import English
from spacy.lang.en.stop_words import STOP_WORDS
import string

nlp_sw = English()
nlp_n = spacy.load("en_core_web_sm")

removable_char = list(string.punctuation) + ["\n", "\r"]

# text arguement in string
# remove stop words of the provided text
# return a list of words in small letters in the text without stop words and punctuations
def remove_stop_words(text):
    text = text.lower()
    token_doc = nlp_sw(text)
    clean_text = []
    for token in token_doc:
        token_text = token.text
        token_text = token_text.replace(" ", "")
        lexeme = nlp_sw.vocab[token_text]
        if not(lexeme.is_stop or token_text in removable_char):
            if token_text and not(token_text.isspace()):
                tmp_text = token_text[1:] if token_text[0] in removable_char else token_text
                if tmp_text and not(tmp_text.isspace()):
                    clean_text.append(tmp_text)
    return clean_text

# text arguement in string
# normalise the text
# return a list of words in small letters in the normalised text
def normalise_text(text):
    text = text.lower()
    token_doc = nlp_n(text)
    normalised_text = []
    for token in token_doc:
        lemma = token.lemma_
        if not(lemma == "-PRON-"):
            normalised_text.append(lemma)
    return normalised_text

In [2]:
import pandas as pd
df = pd.read_csv('training_data.csv')
df

Unnamed: 0,Label,Sentences
0,0,is upset that he can't update his Facebook by ...
1,0,@Kenichan I dived many times for the ball. Man...
2,0,my whole body feels itchy and like its on fire
3,0,"@nationwideclass no, it's not behaving at all...."
4,0,@Kwesidei not the whole crew
...,...,...
1599994,4,Just woke up. Having no school is the best fee...
1599995,4,TheWDB.com - Very cool to hear old Walt interv...
1599996,4,Are you ready for your MoJo Makeover? Ask me f...
1599997,4,Happy 38th Birthday to my boo of alll time!!! ...


In [3]:
# 0 = negative, 2 = neutral, 4 = positive
data_negative = df[df.Label == 0][:10000]
data_positive = df[df.Label == 4][:10000]
data_positive["Label"] = 1
df_row_merged = pd.concat([data_negative, data_positive], ignore_index=True)
df_row_merged

Unnamed: 0,Label,Sentences
0,0,is upset that he can't update his Facebook by ...
1,0,@Kenichan I dived many times for the ball. Man...
2,0,my whole body feels itchy and like its on fire
3,0,"@nationwideclass no, it's not behaving at all...."
4,0,@Kwesidei not the whole crew
...,...,...
19995,1,Morning! I have slacked for two days in twitte...
19996,1,@bensummers Isn't that sweet of them.... Altru...
19997,1,"@jakrose Um, milk *fathers* don't have udders...."
19998,1,@zenaweist They could also tweet @BeccaRoberts


In [4]:
df_row_merged = df_row_merged.sample(frac=1).reset_index(drop=True)
df_row_merged

Unnamed: 0,Label,Sentences
0,1,@benhafner i would have to agree that @jocam i...
1,1,@stbo I'm getting old 3: I talk a lot about th...
2,1,finally has a name for her penguin Bertie!
3,0,Hw cm MTV duznt show music anymore? all v c is...
4,0,I still have major belly itch. Bummer. Dagger ...
...,...,...
19995,0,Chapped lips...I wish I knew where my Burt's B...
19996,1,@NataliaAntonova Aw that would be nice x
19997,1,"@TrueGabe yep to the wine, and feijoa sorbet, ..."
19998,0,Have to update my picture cos I look old and f...


In [5]:
training_data = df_row_merged[:int(len(df_row_merged)*0.7)]
test_data = df_row_merged[int(len(df_row_merged)*0.7):]
training_data


Unnamed: 0,Label,Sentences
0,1,@benhafner i would have to agree that @jocam i...
1,1,@stbo I'm getting old 3: I talk a lot about th...
2,1,finally has a name for her penguin Bertie!
3,0,Hw cm MTV duznt show music anymore? all v c is...
4,0,I still have major belly itch. Bummer. Dagger ...
...,...,...
13995,1,I'm excited for rescue me to start tomorrow. O...
13996,0,hooray for the ever so reliable signalflare se...
13997,0,"Sitting in my car, waiting for my mum to come ..."
13998,0,My printer / scanner / copier in on the fritz


In [6]:
test_data

Unnamed: 0,Label,Sentences
14000,1,is at Moffitt Library with Ate Golda and Kuya ...
14001,0,"I wish I was black. Actually, I think I was me..."
14002,1,"@berrygurl919 hmmm, my Pearl had a fatal erro ..."
14003,0,"Train rammed, fellow commuters vile . Special ..."
14004,0,i just got so emotional at jeremy kyle
...,...,...
19995,0,Chapped lips...I wish I knew where my Burt's B...
19996,1,@NataliaAntonova Aw that would be nice x
19997,1,"@TrueGabe yep to the wine, and feijoa sorbet, ..."
19998,0,Have to update my picture cos I look old and f...


In [7]:
# remove url in tweets
# remove tag (@) in tweets
# remove hashtags (#) in tweets
# remove stopwords
# normalise text
# this cell might take some time to run, be patient
import re
import string

url_pattern = re.compile(".*https?:\/\/")
hashtag_pattern = re.compile("#[a-z\d-]+")
tag_pattern = re.compile("^@")
punc = string.punctuation
punc = punc.replace("-", "")
punc += "“”"
punc = punc.replace ("'","")

pd.options.mode.chained_assignment = None

def process_text(text):
    tmp_no_url_hashtag = []
    for word in re.split(' |\r|\n', text):
        if url_pattern.match(word) or tag_pattern.match(word) or hashtag_pattern.match(word):
            continue
        else:
            clean_word = word.translate(str.maketrans("","", punc))
            clean_word = clean_word.replace("’","'")
            tmp_no_url_hashtag.append(clean_word)
    tmp = " ".join(tmp_no_url_hashtag)
    without_stopwords = " ".join(remove_stop_words(tmp))
    processed = " ".join(normalise_text(without_stopwords))
    return processed

for i in range (len(training_data["Sentences"])):
    training_data["Sentences"][i] = process_text(training_data["Sentences"][i])
    if i % 1000 == 0:
        print ("Processing", i, "unit")

Processing 0 unit
Processing 1000 unit
Processing 2000 unit
Processing 3000 unit
Processing 4000 unit
Processing 5000 unit
Processing 6000 unit
Processing 7000 unit
Processing 8000 unit
Processing 9000 unit
Processing 10000 unit
Processing 11000 unit
Processing 12000 unit
Processing 13000 unit


In [31]:
training_data

for i in range(2000,1999+len(training_data)):
    print(type(training_data["Sentences"][i]))

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class

In [9]:
validation_data = training_data[:2000]
training_data = training_data[2000:]

In [10]:
import numpy as np
train_x_val = np.asarray(training_data["Sentences"])
train_y_val = np.asarray(training_data["Label"])
valid_x_val = np.asarray(validation_data["Sentences"])
valid_y_val = np.asarray(validation_data["Label"])

In [11]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
embedding = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
# embedding = "https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1"
hub_layer = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)
# hub_layer(training_data["Sentences"][:3])

In [12]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 20)                400020    
_________________________________________________________________
dense (Dense)                (None, 16)                336       
_________________________________________________________________
dense_1 (Dense)              (None, 10)                170       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 11        
Total params: 400,537
Trainable params: 400,537
Non-trainable params: 0
_________________________________________________________________


In [13]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [14]:
history = model.fit(train_x_val,
                    train_y_val,
                    epochs=20,
                    validation_data=(valid_x_val, valid_y_val),
                    verbose=1)

Train on 12000 samples, validate on 2000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


Epoch 5/20
Epoch 6/20
Epoch 7/20


Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20


Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20


Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20


Epoch 20/20


In [15]:
test_x_val = np.asarray(test_data["Sentences"])
test_y_val = np.asarray(test_data["Label"])

In [16]:
results = model.evaluate(test_x_val, test_y_val)



In [17]:
def a(text):
    predict = model.predict([process_text(text)])
    return predict[0][0]

In [18]:
results = model.evaluate(test_x_val, test_y_val)

 - 0s 72us/sample - loss: 1.2735 - accuracy: 0.5750


0.9922389

In [19]:
a('''  
My dog died this morning. 
She has been rather strange for a few days, and had a massive stroke last night. 
Not a pretty sight, something twitching and frightened, unable to move, covered in it's own bodily fluids. 
We called the vet around and he gave her a massive dose of anaesthetic to put her out of her misery. 
He had trouble finding a vein though, and in the end injected her directly into her jugular and heart to do it quickly. 
She'd had a good life though, she was around 16, a fair age for a dog. 
We just have Bilbo left now, who although old and rather smelly is full of life and will hopefully stick around for a while yet.
Happy new year everyone, have a fantastic night tonight and wipe the slate clean for a new start. 
Remember, life is short and precious - make the most of every possible moment. 
''')

0.0017740441

In [20]:
a('''
      Where do i start??? 
      Yesterday was valentines day. 
      This usually was the one day out of the year where i knew that i'd be by myself. Well not this year. 
      It was the  BEST  day/date i've ever had. Its also the longest date i've every been on....about 12 hours long.   
      First of all it started off by me going to go and pick her up at her place. 
      She got me a card and fun dip. The card she gave me was perfect, and the fun dip, hey who doesnt like fun dip? 
      Well after that i took her to our first place we were going to go to. 
      I didnt tell her what we were going to do, but i did tell her where we were going. I wanted it to be a surprise. 
      We went to Kensington Metropark, to go and feed the birds from out of your hand....
      its very cool, we had a really good time doing that. 
      We decided then to go walk the trail a bit, and then something happened that maked the date even better....
      we came across about 9 deer, that were only about 10 feet away from us...it was really cool...
      we were holding hands by then and just watching the deer and what they were going to do. 
      The baby deers started to follow us, which was also cool...we had a really good time at kensington.  
      
      The second place we went to was to go and eat....
      i really didnt have an idea on where we were going to go, but there was a place in livonia that i wanted to go to and try,
      and she said she was up for anything. We went to Buca di Peppo. 
      Its an italian resturant where they serve family size portions. 
      We got this shrimp pasta stuff, it was good, but there was a ton of food....
      we could only eat one serving and we were both full...sooo we left.  
      
      The third place that we went to was just a place for us to talk...so we went to the Coffee Bean Coffee House in Plymouth. 
      I think we got there around 5 and we didnt leave till 9:30...we talked about everything. 
      It was great. Never did we have a pause in conversation....which when you usually talk to someone for 4.5 hours, 
      there usually are points when you cant think of anything to say...that just didnt happen. 
      But the day/date wasnt over...  
      
      The fourth and final thing we did was we went to go see the movie "How to lose a guy in 10 days". 
      It was a really good movie, its not quite a girl movie and its not quite a guy movie...
      I enjoyed it and i know she did too...
      we were the last two people to leave the inside of the theater, cause we really didnt feel like in any kind of a rush. 
      It was late by then so i decided to take her home. We took the long way back so we could still talk some more....
      i've never been this comfortable talking to anyone for this long....
      when we got back to her place i walked her to her door, gave her a hug goodnight...
      i wanted to give her a kiss goodnight, but i want things to go smoothly between us, so we'll know when it feels right 
      (although it did last night).   
      I cant say that i have have'd a better time than i did yesterday. 
      I know that i wont be able to spend the money always like that, 
      but it was definetly worth it too have that good of a time with her...
      wouldnt change anything that happened yesterday.  Stay tuned for more....  
''')

0.003925763

In [21]:
a("the teacher praised him for the work that i did for him")

0.8575078

In [22]:
a("sad leh")

0.024187557

In [23]:
a("This movie is not garbage just because you are beside me.")

0.6054409

In [24]:
a('This movie is trash')

0.11821625

In [25]:
a('This movie is garbage')

0.6054409

In [26]:
a('The teacher praised me')

0.9815144