### Packages


In [1]:
import numpy as np
import pandas as pd
import os
import string
import contractions

In [2]:
from tensorflow.keras.preprocessing.text import Tokenizer, text_to_word_sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences

from keras.layers import Embedding
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.models import load_model

import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

nltk_path = os.getcwd() + "\\nltk_data\\"

nltk.download("wordnet", download_dir=nltk_path)
nltk.download("stopwords", download_dir=nltk_path)
#  nltk.download('wordnet')
nltk.data.path.append(nltk_path)

[nltk_data] Downloading package wordnet to c:\Users\Safin
[nltk_data]     Arafat\Desktop\sentimental-analysis\sentiment-
[nltk_data]     analysis\nltk_data\...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to c:\Users\Safin
[nltk_data]     Arafat\Desktop\sentimental-analysis\sentiment-
[nltk_data]     analysis\nltk_data\...
[nltk_data]   Package stopwords is already up-to-date!


### Load the Model


In [3]:
model_path = "./sentiment_model.h5"

model = load_model(model_path)
optimizer = Adam(learning_rate=0.0001)
model.compile(
    optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)

model.summary()



## Running Prediction


### Processing Data


In [12]:
df = pd.DataFrame({"reviewText": ["This is a good", "bad", "nice", "breaks"]})


def contract(text):
    if isinstance(text, str):
        return contractions.fix(text)
    else:
        return "Bad String"


df["reviewText"] = df["reviewText"].apply(contract)

In [13]:
print(df.head())

       reviewText
0  This is a good
1             bad
2            nice
3          breaks


In [14]:
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()


def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = "".join(
        [char for char in text if char not in string.punctuation]
    )  # Remove punctuation
    words = word_tokenize(text)  # Tokenize
    words = [
        lemmatizer.lemmatize(word) for word in words if word not in stop_words
    ]  # Lemmatization and stopword removal
    return " ".join(words)


df["reviewText"] = df["reviewText"].apply(preprocess_text)

print(df["reviewText"])

0     good
1      bad
2     nice
3    break
Name: reviewText, dtype: object


In [15]:
def tokenize(text):
    tokens = Tokenizer()
    tokens.fit_on_texts(text)
    return tokens.texts_to_sequences(text), tokens.word_index


corpus = df["reviewText"].tolist()

seq, vocab_size = tokenize(corpus)

print(seq)

[[1], [2], [3], [4]]


In [17]:
sequences = seq
maxlen = 108
X_padded = pad_sequences(
    sequences,
    maxlen=maxlen,
    dtype="int32",
    padding="post",
    truncating="post",
    value=0.0,
)

### Processing Output


In [18]:
pred = model.predict(X_padded, batch_size=32)
y_pred = np.argmax(pred, axis=1)
print(pred)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 460ms/step
[[0.8455261  0.15447398]
 [0.5427055  0.4572945 ]
 [0.92252153 0.07747851]
 [0.87215096 0.127849  ]]


In [19]:
df["sentiment"] = y_pred

In [23]:
df.T.to_json()

'{"0":{"reviewText":"good","sentiment":0},"1":{"reviewText":"bad","sentiment":0},"2":{"reviewText":"nice","sentiment":0},"3":{"reviewText":"break","sentiment":0}}'