## Importing all the libraries

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import numpy as np
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, load_model
from keras.layers import Dense, Embedding, LSTM, Bidirectional,Flatten,Dropout
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re

## Fetching and preprocessing the data

In [7]:
def remove_special_chars(rev):
     for remove in map(lambda r: re.compile(re.escape(r)), [",", ":", "\"", "=", "&", ";", "%", "$",
 "@", "^", "*", "(", ")", "{", "}",
 "[", "]", "|", "/", "\\", ">", "<", "-",
 "!", "?", ".", "'",
 "_", "— -", "#"]):
         rev.replace(remove, " ", inplace=True)
     return rev
def remove_tags(text):
     return re.compile(r'<[^>]+>').sub('', text)
def remove_num(text):
     return ''.join(re.sub(r'([0–9]+)','',text))
data = pd.read_csv('/content/drive/MyDrive/IMDB/IMDB_Dataset.csv')
data.review=data.review.apply(lambda x : remove_tags(x))
data.review=data.review.apply(lambda x : remove_num(x))
remove_special_chars(data.review)
data.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production The filming tec...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there s a family where a little boy ...,negative
4,Petter Mattei s Love in the Time of Money is...,positive


## Word Embeddings

In [8]:
tokenizer = Tokenizer(num_words=5000,filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',lower=True,split=' ')
tokenizer.fit_on_texts(data['review'])
X = tokenizer.texts_to_sequences(data['review'])
X = pad_sequences(X,maxlen=500)
Y = data['sentiment']
vocab_size = len(tokenizer.word_index) + 1


# Creating train and test sets:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state = 24)

#We store this tokenizer in a file to use later in web app
import pickle

# saving
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
from sklearn.preprocessing import LabelEncoder

def prepare_targets(y_train, y_test):
 le = LabelEncoder()
 le.fit(y_train)
 y_train_enc = le.transform(y_train)
 y_test_enc = le.transform(y_test)
 return y_train_enc, y_test_enc
ytrain,ytest = prepare_targets(Y_train,Y_test)

## Designing the Model

In [9]:
model = Sequential()
model.add(Embedding(vocab_size, 50, input_length=500))
model.add(Bidirectional(LSTM(128)))
model.add(Dropout(0.5))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer='adam',
 loss='binary_crossentropy',
 metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 50)           5275650   
                                                                 
 bidirectional (Bidirectiona  (None, 256)              183296    
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense (Dense)               (None, 1)                 257       
                                                                 
Total params: 5,459,203
Trainable params: 5,459,203
Non-trainable params: 0
_________________________________________________________________


## Training the model



In [11]:
from keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
history=model.fit(X_train, ytrain,
 batch_size=128,
 epochs=20,
 validation_data=[X_test, ytest],
 callbacks=[es])
#I saved this model to use while deploying
model.save('movie_sent.h5')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 8: early stopping


In [13]:
string11="Between the Lovecraftian overtones and Liberato’s performance, The Beach House offers up beautifully shot terror and will make you think before opening your door."
x_1=tokenizer.texts_to_sequences([string11])
x_1 = pad_sequences(x_1,maxlen=500)
model.predict(x_1)

array([[0.983709]], dtype=float32)

- If the output is closer to 1, then it is a positive review.
- If the output is closer to 0, then it is a negative review.

## Sentiment analysis app using streamlit code

In [22]:
pip install streamlit

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting streamlit
  Downloading streamlit-1.12.2-py2.py3-none-any.whl (9.1 MB)
[K     |████████████████████████████████| 9.1 MB 28.1 MB/s 
Collecting pympler>=0.9
  Downloading Pympler-1.0.1-py3-none-any.whl (164 kB)
[K     |████████████████████████████████| 164 kB 67.6 MB/s 
Collecting pydeck>=0.1.dev5
  Downloading pydeck-0.8.0b1-py2.py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 52.6 MB/s 
[?25hCollecting blinker>=1.0.0
  Downloading blinker-1.5-py2.py3-none-any.whl (12 kB)
Collecting watchdog
  Downloading watchdog-2.1.9-py3-none-manylinux2014_x86_64.whl (78 kB)
[K     |████████████████████████████████| 78 kB 7.2 MB/s 
Collecting semver
  Downloading semver-2.13.0-py2.py3-none-any.whl (12 kB)
Collecting gitpython!=3.1.19
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 69.8 MB/s 
Collecting r

In [23]:
import streamlit as st
import pickle
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

def predict(message):

 model=load_model('movie_sent.h5')

 with open('tokenizer.pickle', 'rb') as handle:
  tokenizer = pickle.load(handle)
 x_1 = tokenizer.texts_to_sequences([message])
 x_1 = pad_sequences(x_1, maxlen=500)
 predictions = model.predict(x_1)[0][0]
 return predictions

st.title('Movie Review Sentiment Analyzer')
message = st.text_area('Enter Review:')
if st.button('Analyze'):
 with st.spinner('Analyzing the text …'):
  prediction=predict(message)
 if prediction > 0.6:
  st.success('Positive review with {:.2f} confidence'.format(prediction))
  st.balloons()
 elif prediction <0.4:
  st.error('Negative review with {:.2f} confidence'.format(1-prediction))
 else:
  st.warning('Not sure! Try to add some more words') 

  command:

    streamlit run /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py [ARGUMENTS]
2022-09-08 06:32:10.093 
  command:

    streamlit run /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py [ARGUMENTS]
