<a href="https://colab.research.google.com/github/Raghvender1205/SentimentAnalysis_MajorProject/blob/master/Sentiment_Streamlit_App_Example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np 
import pandas as pd

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Embedding, LSTM, Bidirectional, Flatten, Dropout
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split
import re

2021-06-30 12:14:28.934218: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


In [None]:
def remove_special_char(text):
    for remove in map(lambda r: re.compile(re.escape(r)), [",", ":",
    "\"", "=", "&", "%", ";", "$",
    "@", "%", "^", "(", ")", "{", "}",
    "[", "]", "|", "/", "\\", ">", "<", "-", 
    "!", "?", ".", "`",
    "-", "- -", "#"]):
        text.replace(remove, "", inplace=True)
        return text
    
def remove_tags(text):
    return re.compile(r'<[^>]+>').sub(" ", text)
def remove_num(text):
    return ''.join(re.sub(r"([0–9]+)","", text))
    
    
data = pd.read_csv('/content/drive/MyDrive/SmartKnower/MajorProject/IMDB Dataset.csv')
data.review = data.review.apply(lambda x: remove_tags(x))
data.review = data.review.apply(lambda x : remove_num(x))
remove_special_char(data.review)
data.head(10)

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. The filming t...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
5,Probably my all-time favorite movie a story of...,positive
6,I sure would like to see a resurrection of a u...,positive
7,This show was an amazing fresh & innovative id...,negative
8,Encouraged by the positive comments about this...,negative
9,If you like original gut wrenching laughter yo...,positive


### Word Embeddings

In [None]:
tokenizer = Tokenizer(num_words=5000, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', 
                      lower=True, split=" ")

tokenizer.fit_on_texts(data['review'])
X = tokenizer.texts_to_sequences(data['review'])
X = pad_sequences(X, maxlen=500)
Y = data['sentiment']

vocab_size = len(tokenizer.word_index)

# We can create Train and Test Set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state = 24)

In [None]:
# Store the tokenizer in a file to use later in the web app
import pickle

with open('tokenizer.pickle', 'wb') as f:
    pickle.dump(tokenizer, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# Encode Labels using LabelEncoder
from sklearn.preprocessing import LabelEncoder

def prepare_targets(y_train, y_test):
    le = LabelEncoder()
    le.fit(y_train)
    y_train_enc = le.transform(y_train)
    y_test_enc = le.transform(y_test)
    
    return y_train_enc, y_test_enc

y_train, y_test = prepare_targets(Y_train, Y_test) 

In [None]:
model = Sequential([
    Embedding(vocab_size, 50, input_length=500),
    Bidirectional(LSTM(128)),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 500, 50)           6470350   
_________________________________________________________________
bidirectional (Bidirectional (None, 256)               183296    
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 1)                 257       
Total params: 6,653,903
Trainable params: 6,653,903
Non-trainable params: 0
_________________________________________________________________


### Train

In [None]:
# Train
from tensorflow.keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

history = model.fit(X_train, y_train,
                   batch_size=128, epochs=20, validation_data=[X_test, y_test],
                   callbacks=[es])

model.save('movie_sentiment.h5')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 00006: early stopping


In [None]:
# Test your own reviews

string11 = '''Between the Lovecraftian overtones and Liberato’s performance, 
The Beach House offers up beautifully shot terror and will make you think before opening your door.'''

x_1 = tokenizer.texts_to_sequences([string11])
x_1 = pad_sequences(x_1, maxlen=500)
model.predict(x_1)

array([[0.9251882]], dtype=float32)

## Streamlit Sentiment App

Install Streamlit using
```python
pip install streamlit
```

In [None]:
import streamlit as st