# Second dataset

## Get Data

In [3]:
import pandas as pd

In [4]:
dfcsv = pd.read_csv("../Twitter_Data.csv")

In [5]:
dfcsv.head()

Unnamed: 0,clean_text,category
0,when modi promised â€œminimum government maximum...,-1.0
1,talk all the nonsense and continue all the dra...,0.0
2,what did just say vote for modi welcome bjp t...,1.0
3,asking his supporters prefix chowkidar their n...,1.0
4,answer who among these the most powerful world...,1.0


In [6]:
dfcsv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162980 entries, 0 to 162979
Data columns (total 2 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   clean_text  162976 non-null  object 
 1   category    162973 non-null  float64
dtypes: float64(1), object(1)
memory usage: 2.5+ MB


In [7]:
dfcsv.isnull().sum()

clean_text    4
category      7
dtype: int64

## Data cleansing

In [8]:
import tensorflow as tf
import numpy as np
import re

2022-11-03 11:04:41.910207: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-03 11:04:42.243184: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-11-03 11:04:42.310309: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-03 11:04:42.310338: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if yo

### Remove null values 

In [9]:
dfcsv.dropna(inplace=True) # removes rows with null values

In [10]:
dfcsv.isnull().sum()

clean_text    0
category      0
dtype: int64

In [11]:
import re
# Same cleaning function as other dataset
def clean_data(text):
    # Removing mentions
    text = re.sub(r"/b@","",text)
        
    # Remove links
    text = re.sub(r"(?:\@|https?\://)\S+", "", text)
    
    # Remove digits & strings of digits
    text = re.sub(r"([0-9])","",text)
    
    # Remove new lines
    text = re.sub(r"\n","",text)
    
    # Removes all punctuation
    text = re.sub(r'[^\w\s]', '', text)
    
    
    return text

In [12]:
dfcsv["clean_text"] = dfcsv["clean_text"].apply(clean_data)

In [13]:
dfcsv.head()

Unnamed: 0,clean_text,category
0,when modi promised minimum government maximum ...,-1.0
1,talk all the nonsense and continue all the dra...,0.0
2,what did just say vote for modi welcome bjp t...,1.0
3,asking his supporters prefix chowkidar their n...,1.0
4,answer who among these the most powerful world...,1.0


## Lowercase text

In [14]:
def lowercase_data(data):
    data = data.lower()
    return data

In [15]:
dfcsv["clean_text"] = dfcsv["clean_text"].apply(lowercase_data)

In [16]:
dfcsv.head(10)

Unnamed: 0,clean_text,category
0,when modi promised minimum government maximum ...,-1.0
1,talk all the nonsense and continue all the dra...,0.0
2,what did just say vote for modi welcome bjp t...,1.0
3,asking his supporters prefix chowkidar their n...,1.0
4,answer who among these the most powerful world...,1.0
5,kiya tho refresh maarkefir comment karo,0.0
6,surat women perform yagna seeks divine grace f...,0.0
7,this comes from cabinet which has scholars lik...,0.0
8,with upcoming election india saga going import...,1.0
9,gandhi was gay does modi,1.0


## Remove emojis

In [17]:
def remove_emoji(data):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  #face emojis
                           u"\U0001F300-\U0001F5FF"  #symbols
                           u"\U0001F680-\U0001F6FF"  #transport & map emojis
                           u"\U0001F1E0-\U0001F1FF"  # flags
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', data)

In [18]:
remove_emoji("ðŸ‡¬ðŸ‡§")

''

In [19]:
dfcsv["clean_text"] = dfcsv["clean_text"].apply(lowercase_data)

In [20]:
dfcsv.head(10)

Unnamed: 0,clean_text,category
0,when modi promised minimum government maximum ...,-1.0
1,talk all the nonsense and continue all the dra...,0.0
2,what did just say vote for modi welcome bjp t...,1.0
3,asking his supporters prefix chowkidar their n...,1.0
4,answer who among these the most powerful world...,1.0
5,kiya tho refresh maarkefir comment karo,0.0
6,surat women perform yagna seeks divine grace f...,0.0
7,this comes from cabinet which has scholars lik...,0.0
8,with upcoming election india saga going import...,1.0
9,gandhi was gay does modi,1.0


## Remove Stop words

In [21]:
import nltk
nltk.download("stopwords")

[nltk_data] Downloading package stopwords to /home/adamo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [22]:
from nltk.corpus import stopwords
stopwords = stopwords.words("english")

In [23]:
print(stopwords)

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [24]:
dfcsv["clean_text"] = dfcsv["clean_text"].apply(lambda words: ' '.join([word for word in words.split() if word not in (stopwords)]))
# Using lamba to have a neat inline function, also works better than using a written function

In [25]:
dfcsv.head(10)

Unnamed: 0,clean_text,category
0,modi promised minimum government maximum gover...,-1.0
1,talk nonsense continue drama vote modi,0.0
2,say vote modi welcome bjp told rahul main camp...,1.0
3,asking supporters prefix chowkidar names modi ...,1.0
4,answer among powerful world leader today trump...,1.0
5,kiya tho refresh maarkefir comment karo,0.0
6,surat women perform yagna seeks divine grace n...,0.0
7,comes cabinet scholars like modi smriti hema t...,0.0
8,upcoming election india saga going important p...,1.0
9,gandhi gay modi,1.0


## Removing non enlgish words

In [26]:
nltk.download("words")

[nltk_data] Downloading package words to /home/adamo/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [27]:
Allwords = set(nltk.corpus.words.words())

In [28]:
dfcsv["clean_text"] = dfcsv["clean_text"].apply(lambda w: ' '.join([ws for ws in w.split() if ws in (Allwords) or not ws.isalpha()]))

In [29]:
dfcsv.head(10)

Unnamed: 0,clean_text,category
0,minimum government maximum governance begin di...,-1.0
1,talk nonsense continue drama vote,0.0
2,say vote welcome told main campaigner think relax,1.0
3,prefix great service confusion read crustal cl...,1.0
4,answer among powerful world leader today trump...,1.0
5,tho refresh comment karo,0.0
6,surat perform divine grace become,0.0
7,comes cabinet like smriti time introspect,0.0
8,upcoming election saga going important pair lo...,1.0
9,gay,1.0


## Lemmatization

In [30]:
import nltk 
nltk.download("wordnet")
nltk.download("omw-1.4")

[nltk_data] Downloading package wordnet to /home/adamo/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/adamo/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [31]:
from nltk.stem import WordNetLemmatizer

In [32]:
lemmatizer = WordNetLemmatizer()

In [33]:
print(lemmatizer.lemmatize("dogs"))
print(lemmatizer.lemmatize("phones"))

dog
phone


In [34]:
dfcsv["clean_text"] = dfcsv["clean_text"].apply(lemmatizer.lemmatize)

In [35]:
dfcsv.head(10)

Unnamed: 0,clean_text,category
0,minimum government maximum governance begin di...,-1.0
1,talk nonsense continue drama vote,0.0
2,say vote welcome told main campaigner think relax,1.0
3,prefix great service confusion read crustal cl...,1.0
4,answer among powerful world leader today trump...,1.0
5,tho refresh comment karo,0.0
6,surat perform divine grace become,0.0
7,comes cabinet like smriti time introspect,0.0
8,upcoming election saga going important pair lo...,1.0
9,gay,1.0


## Encoding the label

In [36]:
def sentiment_encoder(data):
    if data == -1.0:
        data = 0
    elif data == 1.0:
        data = 1
    elif data == 0:
        data = 0.5
    else:
        data = None
        
    return data

In [37]:
dfcsv["category"] = dfcsv["category"].apply(sentiment_encoder)

In [38]:
dfcsv.head()

Unnamed: 0,clean_text,category
0,minimum government maximum governance begin di...,0.0
1,talk nonsense continue drama vote,0.5
2,say vote welcome told main campaigner think relax,1.0
3,prefix great service confusion read crustal cl...,1.0
4,answer among powerful world leader today trump...,1.0


In [39]:
dfcsv.isnull().sum()

clean_text    0
category      0
dtype: int64

## Split data into training and testing

In [40]:
from sklearn.model_selection import train_test_split

In [41]:
# Test size is 30% of total data from data set
x_train, x_test, y_train, y_test = train_test_split(dfcsv["clean_text"].values, dfcsv["category"].values,
                                                    test_size=0.3)

# Checking the sentiment has been split up appropriately 
print('sentiment Text: ', x_train[2])
print('sentiment: ', y_train[2])

sentiment Text:  second term
sentiment:  0.5


## Tokenizing data

In [42]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [43]:
tokenizer = Tokenizer(num_words=100000000)
totalWords = [*x_train, *x_test]
tokenizer.fit_on_texts(totalWords)

In [44]:
wordindex = tokenizer.word_index
VocabSize= len(wordindex)
print("The number of unique words is: ", VocabSize)

The number of unique words is:  17954


In [45]:
print(wordindex)



In [46]:
len(x_test)

48891

In [47]:
train_seq = tokenizer.texts_to_sequences(x_train) # Compares all the data to vocab and assigns a integer reference for the vocab 
test_seq = tokenizer.texts_to_sequences(x_test)
print(test_seq)

[[1899, 38, 58], [878, 69, 258, 2767, 797, 150, 514, 280, 108, 201, 10, 386, 878], [289, 43, 1, 2300, 205, 300, 2688, 16, 7577, 102, 549], [481, 126, 182, 123, 4, 4, 206, 148, 1954, 1106, 6331], [1612, 1546, 1268, 649, 5693, 211, 600, 5818, 649, 2264, 417, 2686, 133, 52, 587], [159, 159, 1341, 401, 163, 797, 32, 280, 1168, 1332, 794, 650, 409, 508, 98], [12, 46, 1683, 200, 792, 40, 39], [1394, 2013, 5769, 351, 6295, 186], [17, 2, 700, 14, 17, 4574, 108, 585, 1656, 17, 1127, 9, 65, 7], [359, 973, 428, 32], [1248, 45, 417, 304, 1105, 1487, 19, 2005, 1555, 851, 977], [2198, 642, 404, 1591, 474, 206, 314, 191, 1402, 1164, 3109, 154, 197, 1334, 2240, 1094, 495], [3, 14, 96, 99, 9854], [557, 122, 249, 753, 524], [61, 9, 1152, 1, 3, 306, 765, 38, 342], [303, 444, 214, 7, 937, 68, 69], [638, 13003, 4132, 8727, 159, 384, 12654, 83, 8607, 416], [307, 914], [140, 384, 1442, 60, 307, 914, 301, 7969, 346, 256, 1831, 1284], [711, 42, 983, 824], [252, 226, 338, 106, 424, 48, 125, 1048, 48, 128, 2, 43

In [48]:
len(test_seq)

48891

In [49]:
from tensorflow.keras.preprocessing.sequence import pad_sequences 

In [50]:
# PAD sequences so they are all the same length as the inputs must all be of same length

train_padded = pad_sequences(train_seq)

print(train_padded[0])

# Check size of each padded sequence

TrainingPadShape = train_padded.shape[1] # for use when defining the model shape 
print(TrainingPadShape)

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0 2468   47]
31


In [51]:
test_padded = pad_sequences(test_seq,maxlen=TrainingPadShape)
print(test_padded.shape[1])

31


## Constructing the model

In [52]:
from tensorflow.keras.layers import Input, Dense, Embedding, LSTM, GlobalMaxPooling1D
from tensorflow.keras.models import Model

In [53]:
DimentionEmbeddingLayer = 10
DimentionLSTMLayer = 30

inputLayer = Input(shape=(TrainingPadShape, ))
modelLayers = Embedding(VocabSize +1, DimentionEmbeddingLayer)(inputLayer) # Vocabsize + 1 as indexing starts from 0
modelLayers = LSTM(DimentionLSTMLayer, return_sequences=True)(modelLayers)
modelLayers = GlobalMaxPooling1D()(modelLayers)
modelLayers = Dense(32, activation="relu")(modelLayers)
modelLayers = Dense(1, activation="sigmoid")(modelLayers)

model = Model(inputLayer, modelLayers)

2022-11-03 11:05:12.511544: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-11-03 11:05:12.512167: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2022-11-03 11:05:12.512239: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (adamo-Surface-Pro-7): /proc/driver/nvidia/version does not exist
2022-11-03 11:05:12.512959: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Compling the model

In [54]:
model.compile(optimizer="RMSprop",loss="binary_crossentropy",metrics=["accuracy"])

## Fitting data

In [55]:
from keras.callbacks import ModelCheckpoint

### Fitting data once

In [54]:
fittingdata= model.fit(train_padded, y_train, epochs=1,validation_data=(test_padded, y_test))



### Tesing tracking weights

In [81]:
checkpoint1 = ModelCheckpoint("Weights_tracked.hdf5", monitor='val_accuracy', verbose=1,save_best_only=True, mode='auto', period=1,save_weights_only=True)
history = model.fit(train_padded, y_train, epochs=5,validation_data=(test_padded, y_test),callbacks=[checkpoint1])

Epoch 1/5
Epoch 1: val_accuracy improved from -inf to 0.56458, saving model to Weights_tracked.hdf5
Epoch 2/5
Epoch 2: val_accuracy improved from 0.56458 to 0.58027, saving model to Weights_tracked.hdf5
Epoch 3/5
Epoch 3: val_accuracy improved from 0.58027 to 0.58862, saving model to Weights_tracked.hdf5
Epoch 4/5
Epoch 4: val_accuracy improved from 0.58862 to 0.59029, saving model to Weights_tracked.hdf5
Epoch 5/5
Epoch 5: val_accuracy did not improve from 0.59029


### Fitting data for best model

In [55]:
checkpoint1 = ModelCheckpoint("Final_model.hdf5", monitor='val_accuracy', verbose=1,save_best_only=True, mode='auto', period=1,save_weights_only=False)
history = model.fit(train_padded, y_train, epochs=150,validation_data=(test_padded, y_test),callbacks=[checkpoint1])

Epoch 1/150
Epoch 1: val_accuracy improved from -inf to 0.58772, saving model to Final_model.hdf5
Epoch 2/150
Epoch 2: val_accuracy improved from 0.58772 to 0.58786, saving model to Final_model.hdf5
Epoch 3/150
Epoch 3: val_accuracy improved from 0.58786 to 0.59062, saving model to Final_model.hdf5
Epoch 4/150
Epoch 4: val_accuracy improved from 0.59062 to 0.59244, saving model to Final_model.hdf5
Epoch 5/150
Epoch 5: val_accuracy did not improve from 0.59244
Epoch 6/150
Epoch 6: val_accuracy improved from 0.59244 to 0.59318, saving model to Final_model.hdf5
Epoch 7/150
Epoch 7: val_accuracy did not improve from 0.59318
Epoch 8/150
Epoch 8: val_accuracy did not improve from 0.59318
Epoch 9/150
Epoch 9: val_accuracy did not improve from 0.59318
Epoch 10/150
Epoch 10: val_accuracy did not improve from 0.59318
Epoch 11/150
Epoch 11: val_accuracy did not improve from 0.59318
Epoch 12/150
Epoch 12: val_accuracy did not improve from 0.59318
Epoch 13/150
Epoch 13: val_accuracy did not improve

Epoch 29/150
Epoch 29: val_accuracy did not improve from 0.59318
Epoch 30/150
Epoch 30: val_accuracy did not improve from 0.59318
Epoch 31/150
Epoch 31: val_accuracy did not improve from 0.59318
Epoch 32/150
Epoch 32: val_accuracy did not improve from 0.59318
Epoch 33/150
Epoch 33: val_accuracy did not improve from 0.59318
Epoch 34/150
Epoch 34: val_accuracy did not improve from 0.59318
Epoch 35/150
Epoch 35: val_accuracy did not improve from 0.59318
Epoch 36/150
Epoch 36: val_accuracy did not improve from 0.59318
Epoch 37/150
Epoch 37: val_accuracy did not improve from 0.59318
Epoch 38/150
Epoch 38: val_accuracy did not improve from 0.59318
Epoch 39/150
Epoch 39: val_accuracy did not improve from 0.59318
Epoch 40/150
Epoch 40: val_accuracy did not improve from 0.59318
Epoch 41/150
Epoch 41: val_accuracy did not improve from 0.59318
Epoch 42/150
Epoch 42: val_accuracy did not improve from 0.59318
Epoch 43/150
Epoch 43: val_accuracy did not improve from 0.59318
Epoch 44/150
Epoch 44: va

Epoch 58/150
Epoch 58: val_accuracy did not improve from 0.59318
Epoch 59/150
Epoch 59: val_accuracy did not improve from 0.59318
Epoch 60/150
Epoch 60: val_accuracy did not improve from 0.59318
Epoch 61/150
Epoch 61: val_accuracy did not improve from 0.59318
Epoch 62/150
Epoch 62: val_accuracy did not improve from 0.59318
Epoch 63/150
Epoch 63: val_accuracy did not improve from 0.59318
Epoch 64/150
Epoch 64: val_accuracy did not improve from 0.59318
Epoch 65/150
Epoch 65: val_accuracy did not improve from 0.59318
Epoch 66/150
Epoch 66: val_accuracy did not improve from 0.59318
Epoch 67/150
Epoch 67: val_accuracy did not improve from 0.59318
Epoch 68/150
Epoch 68: val_accuracy did not improve from 0.59318
Epoch 69/150
Epoch 69: val_accuracy did not improve from 0.59318
Epoch 70/150
Epoch 70: val_accuracy did not improve from 0.59318
Epoch 71/150
Epoch 71: val_accuracy did not improve from 0.59318
Epoch 72/150
Epoch 72: val_accuracy did not improve from 0.59318
Epoch 73/150
Epoch 73: va

Epoch 87/150
Epoch 87: val_accuracy did not improve from 0.59318
Epoch 88/150
Epoch 88: val_accuracy did not improve from 0.59318
Epoch 89/150
Epoch 89: val_accuracy did not improve from 0.59318
Epoch 90/150
Epoch 90: val_accuracy did not improve from 0.59318
Epoch 91/150
Epoch 91: val_accuracy did not improve from 0.59318
Epoch 92/150
Epoch 92: val_accuracy did not improve from 0.59318
Epoch 93/150
Epoch 93: val_accuracy did not improve from 0.59318
Epoch 94/150
Epoch 94: val_accuracy did not improve from 0.59318
Epoch 95/150
Epoch 95: val_accuracy did not improve from 0.59318
Epoch 96/150
Epoch 96: val_accuracy did not improve from 0.59318
Epoch 97/150
Epoch 97: val_accuracy did not improve from 0.59318
Epoch 98/150
Epoch 98: val_accuracy did not improve from 0.59318
Epoch 99/150
Epoch 99: val_accuracy did not improve from 0.59318
Epoch 100/150
Epoch 100: val_accuracy did not improve from 0.59318
Epoch 101/150
Epoch 101: val_accuracy did not improve from 0.59318
Epoch 102/150
Epoch 1

Epoch 116/150
Epoch 116: val_accuracy did not improve from 0.59318
Epoch 117/150
Epoch 117: val_accuracy did not improve from 0.59318
Epoch 118/150
Epoch 118: val_accuracy did not improve from 0.59318
Epoch 119/150
Epoch 119: val_accuracy did not improve from 0.59318
Epoch 120/150
Epoch 120: val_accuracy did not improve from 0.59318
Epoch 121/150
Epoch 121: val_accuracy did not improve from 0.59318
Epoch 122/150
Epoch 122: val_accuracy did not improve from 0.59318
Epoch 123/150
Epoch 123: val_accuracy did not improve from 0.59318
Epoch 124/150
Epoch 124: val_accuracy did not improve from 0.59318
Epoch 125/150
Epoch 125: val_accuracy did not improve from 0.59318
Epoch 126/150
Epoch 126: val_accuracy did not improve from 0.59318
Epoch 127/150
Epoch 127: val_accuracy did not improve from 0.59318
Epoch 128/150
Epoch 128: val_accuracy did not improve from 0.59318
Epoch 129/150
Epoch 129: val_accuracy did not improve from 0.59318
Epoch 130/150
Epoch 130: val_accuracy did not improve from 0.5

Epoch 145/150
Epoch 145: val_accuracy did not improve from 0.59318
Epoch 146/150
Epoch 146: val_accuracy did not improve from 0.59318
Epoch 147/150
Epoch 147: val_accuracy did not improve from 0.59318
Epoch 148/150
Epoch 148: val_accuracy did not improve from 0.59318
Epoch 149/150
Epoch 149: val_accuracy did not improve from 0.59318
Epoch 150/150
Epoch 150: val_accuracy did not improve from 0.59318


## Evalutating model

In [60]:
# Load model as model
from tensorflow import keras
model = keras.models.load_model('Final_model.hdf5')

In [61]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 31)]              0         
                                                                 
 embedding (Embedding)       (None, 31, 10)            179550    
                                                                 
 lstm (LSTM)                 (None, 31, 30)            4920      
                                                                 
 global_max_pooling1d (Globa  (None, 30)               0         
 lMaxPooling1D)                                                  
                                                                 
 dense (Dense)               (None, 32)                992       
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                             

In [62]:
score = model.evaluate(test_padded, y_test)
print("Model Loss: ", score[0])
print("Model accuracy", score[1])

Model Loss:  0.41681650280952454
Model accuracy 0.5931766629219055


## Load final model for testing

In [58]:
# Load model as model
from tensorflow import keras
model = keras.models.load_model('Final_model.hdf5')

## Predicting data

In [59]:
def predict_sentiment(text):
    text_sequence = tokenizer.texts_to_sequences(text)

    text_padded = pad_sequences(text_sequence, maxlen=TrainingPadShape)
    
    predicted_sentiment = model.predict(text_padded)
    if predicted_sentiment < 0.3:
        print("Negative")
    elif predicted_sentiment > 0.6:
        print("Positive")
    else:
        print("Neutral")
    print(predicted_sentiment)

In [79]:
text = []
userinput = input("Text to test sentiment... ")
text.append(userinput)
predict_sentiment(text)

Text to test sentiment... Scottish teams in the Champions league have been terrible 
Positive
[[0.62992686]]


In [60]:
text = ['Today I have had such a productive day! I watered the plants and went to the gym.']
predict_sentiment(text)

Negative
[[0.09373194]]


In [61]:
text = ['Done nothing, not feeling great, need a sleep.']
predict_sentiment(text)

Positive
[[0.99798113]]


In [62]:
text = ['Mathieu Debuchy']
predict_sentiment(text)

Neutral
[[0.5521824]]


In [67]:
text = ['Bad day']
predict_sentiment(text)

Negative
[[0.0405499]]


In [68]:
text = ['WHAT A WIN']
predict_sentiment(text)

Positive
[[0.9931433]]


In [69]:
text = ['Drone war 1']
predict_sentiment(text)

Neutral
[[0.5761163]]
