In [None]:
# Importing all Keras NLP libraries
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras import layers
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#If you are using Google Co-Lab,use the code below to import the file into your notebook. 
#from google.colab import files
#import io
#uploaded = files.upload()
#url = io.BytesIO(uploaded['yelp_labelled.txt'])
url = '/content/drive/My Drive/Deep Learning with AI/DL - NLP/sentiment labelled sentences/amazon_cells_labelled.txt'
#If you are loading this file from a local directory, specify the path
#url = r'C:\Users\vamsi\Documents\DL with NLP\yelp_labelled.txt'
#import the data into a data frame
data = pd.read_csv(url,names=['sentence', 'label'], sep='\t')
data.head()

Unnamed: 0,sentence,label
0,So there is no way for me to plug it in here i...,0
1,"Good case, Excellent value.",1
2,Great for the jawbone.,1
3,Tied to charger for conversations lasting more...,0
4,The mic is great.,1


In [None]:
# Assign the ‘sentence’ and ‘label’ columns
sentences=data['sentence'].values
labels=data['label'].values

In [None]:
# Split the data into training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    sentences, labels, test_size=0.30, random_state=1000)

In [None]:
#Tokenize the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train_tokens = tokenizer.texts_to_sequences(X_train)
X_test_tokens = tokenizer.texts_to_sequences(X_test)
vocab_size = len(tokenizer.word_index) + 1 #The vocabulary size has an additional 1 due to the 0 reserved index

In [None]:
# Define variables and setting Neural Network parameters
epochs = 20 # Number of iteration os Neural network
maxlen = 100 # Maximum number of words in a sentence
embedding_dim = 50 #number of dimensions for the words
num_filters = 64 #filter size for picking the words
kernel_size = 5 #size of the filter 
batch_size = 32 # number of training samples used in one iteration

In [None]:
# Perform padding in order to ensure that all sequences have the same length
X_train_data = pad_sequences(X_train_tokens, padding='post', maxlen=maxlen)
X_test_data = pad_sequences(X_test_tokens, padding='post', maxlen=maxlen)
X_test_data

array([[ 559, 1379,    0, ...,    0,    0,    0],
       [ 254,   69,    1, ...,    0,    0,    0],
       [ 921,  354,    3, ...,    0,    0,    0],
       ...,
       [  39,   38,  998, ...,    0,    0,    0],
       [  23,  439,  320, ...,    0,    0,    0],
       [  35,    1,  154, ...,    0,    0,    0]], dtype=int32)

In [None]:
# Creating the Convolutional Neural network model using input nodes and ReLU activation function 
model = Sequential()
model.add(layers.Embedding(vocab_size, embedding_dim, input_length=maxlen))
model.add(layers.Conv1D(num_filters, kernel_size, activation='relu'))

#Pooling the data after using filters
model.add(layers.GlobalMaxPooling1D())

# Adding one hidden layer
model.add(layers.Dense(10, activation='relu'))
# Add more hidden layers as needed
# model.add(layers.Dense(10, activation='relu'))

# Adding activation layer using sigmoid function to map to binary classifiers
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 100, 50)           75400     
_________________________________________________________________
conv1d (Conv1D)              (None, 96, 64)            16064     
_________________________________________________________________
global_max_pooling1d (Global (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 10)                650       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 11        
Total params: 92,125
Trainable params: 92,125
Non-trainable params: 0
_________________________________________________________________


In [None]:
#Train and test the model
model.fit(X_train_data, y_train,
                    epochs=epochs,
                    verbose=False,
                    validation_data=(X_test_data, y_test),
                    batch_size=batch_size)
# Displaying the Accuracy
loss, accuracy = model.evaluate(X_test_data, y_test, verbose=False)
print("Testing Accuracy:  {:.4f}".format(accuracy))

Testing Accuracy:  0.7900


In [None]:
# Checking with a sample entry
X_test[19] 

'I have always used corded headsets and the freedom from the wireless is very helpful.'

In [None]:
# Predicting the score for the first record
pred = model.predict(X_test_data[0:1])
pred

array([[0.99700594]], dtype=float32)

In [None]:
# Getting the prediction description 
if pred[0] > 0.5:
    prediction = 'Positive Comment'
else:
    prediction = 'Negative Comment'
print(prediction)

Positive Comment
