## Sentiment Analysis using LSTM

In [1]:
#Function to classify the statement as positive or negative
def predict_sentiment(text):
    tw = tokenizer.texts_to_sequences([text])
    tw = pad_sequences(tw,maxlen=200)
    prediction = int(model.predict(tw).round().item())
    print(sentiment_label[1][prediction], "statement")

In [24]:
import pandas as pd
import matplotlib.pyplot as plt
import time

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense, Dropout, SpatialDropout1D
from tensorflow.keras.layers import Embedding

#Read Data from csv file
df = pd.read_csv("./Data.csv")

In [28]:
#Process data for training
data_df = df[['text','sentiment']] #Extracting only text and sentiment from data
data_df = data_df[data_df['sentiment'] != 'neutral'] #Removing Neutral Sentiment
sentiment_label = data_df.sentiment.factorize() #Finding unique sentiment values
data = data_df.text.values
data = data[0:-1] #Removing NaN values in the data

#Tokenizing data into separate words
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(data)
vocab_size = len(tokenizer.word_index) + 1
encoded_docs = tokenizer.texts_to_sequences(data)
padded_sequence = pad_sequences(encoded_docs, maxlen=200)

In [29]:
#Describing model specifications for training
embedding_vector_length = 32
model = Sequential() 
model.add(Embedding(vocab_size, embedding_vector_length, input_length=200) )
model.add(SpatialDropout1D(0.25))
model.add(LSTM(50, dropout=0.5, recurrent_dropout=0.5))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid')) 
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])  

In [30]:
#Training the model
time1 = time.time()
history = model.fit(padded_sequence,sentiment_label[0],validation_split=0.2, epochs=5, batch_size=32)
time2 = time.time()

print("Training time:",time2-time1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 384.7027611732483


In [31]:
#Testing different cases
sentence1 = "Horrible experience"
predict_sentiment(sentence1)

sentence2 = "Fun experience"
predict_sentiment(sentence2)

negative statement
positive statement
