<a href="https://colab.research.google.com/github/tanisha03/MK21_KuberTechies/blob/master/main/SentimentModel/V2/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%tensorflow_version 1.x

TensorFlow is already loaded. Please restart the runtime to change versions.


In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import GRU, LSTM, Dense, Activation, Flatten, Dropout, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer 

In [None]:
import numpy as np
import pandas as pd

import os
import time
import re

import nltk
nltk.download('all')

import gc
import string

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Package biocreative_ppi is already up-to-date!
[nltk_data]    | Downloading package brown to /root/nltk_data...
[nltk_data]    |   Package brown is already up-to-date!
[nltk_data]    | Downloading package brown_tei to /root/nltk_data...
[nltk_data]    |   Package brown_tei is already up-to-date!
[nltk_data]    | Downloading package cess_cat to /root/nltk_data...
[nltk_data]    |   Package cess_cat is already up-to-date!
[nltk_data]    | Downloading package cess_esp to /root/nltk_data...
[nltk_data]    |   Package cess_esp is already up-to-date!
[nltk_data]    | Downloading packag

In [None]:
df = pd.read_csv('clean_intensity.csv')

df.drop_duplicates(inplace = True)
df.dropna(inplace = True)
df.reset_index(inplace = True, drop = True)

df.head()

Unnamed: 0,sentence,intensity,sentiment
0,And my grandfather made everyone open a bank a...,-0.1,Negative
1,I just downloaded the app and it took me only ...,0.6,Positive
2,I mostly use paytm for recharge and for shoppi...,0.9,Positive
3,Hello friends today I am going to give my feed...,0.2,Positive
4,PAYTM app is most Popular app For UPI Transact...,0.9,Positive


In [None]:
df.describe()

Unnamed: 0,intensity
count,6842.0
mean,-0.040061
std,0.615526
min,-0.9
25%,-0.7
50%,-0.1
75%,0.6
max,0.9


In [None]:
def normalize(data):

  data = data.lower()
  data = data.strip(string.punctuation)

  return data

In [None]:
def map_sentiment(review):
  
  if review == 'Negative':
    return 0
  elif review == 'Positive':
    return 1

In [None]:
try:

  df['sentence'] = df['sentence'].apply(lambda x : normalize(x))
  df['sentiment'] = df['sentiment'].apply(lambda x : map_sentiment(x))

except Exception as e:

  print(e)
  pass

In [None]:
df.head()

Unnamed: 0,sentence,intensity,sentiment
0,and my grandfather made everyone open a bank a...,-0.1,0
1,i just downloaded the app and it took me only ...,0.6,1
2,i mostly use paytm for recharge and for shoppi...,0.9,1
3,hello friends today i am going to give my feed...,0.2,1
4,paytm app is most popular app for upi transact...,0.9,1


In [None]:
corpus = df['sentence'].tolist()

t = Tokenizer()
t.fit_on_texts(corpus)
X = t.texts_to_sequences(corpus)

In [None]:
count = len(X)
mean = sum([len(x) for x in X]) / count

print('Average length of the sentence : {}'.format(mean))

Average length of the sentence : 26.299473838059047


In [None]:
#num words

num_words = len(t.word_index)+1

print('Number of unique words in the dataset : {}'.format(num_words))


Number of unique words in the dataset : 9390


In [None]:
from sklearn.model_selection import train_test_split

x_tr, x_te, y_tr, y_te = train_test_split(X, df['sentiment'], test_size = 0.2)

In [None]:
x_tr, x_te = pad_sequences(x_tr, maxlen=30), pad_sequences(x_te, maxlen=30)

In [None]:
try :
  
  x_tr, x_te = np.array(x_tr), np.array(x_te)
  y_tr, y_te = np.array(y_tr), np.array(y_te)

except Exception as e:
  pass


x_tr.shape, x_te.shape, y_tr.shape, y_te.shape

((5473, 30), (1369, 30), (5473,), (1369,))

In [None]:
from tensorflow.keras.layers import Bidirectional, MaxPooling1D

In [None]:
# make a simple sentiment model

model = Sequential()

model.add(Embedding(num_words, 128, input_length=x_tr.shape[1]))
# model.add(Flatten())
model.add(Bidirectional(GRU(256)))
model.add(Dropout(0.5))
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(optimizer='adam', metrics=['accuracy'], loss='binary_crossentropy')

In [None]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 30, 128)           1201920   
_________________________________________________________________
bidirectional_5 (Bidirection (None, 512)               592896    
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 100)               51300     
_________________________________________________________________
activation (Activation)      (None, 100)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

ckpt_a = ModelCheckpoint(filepath='best_acc_bank_weights.hdf5', save_best_only=True, save_weights_only=True, monitor='val_accuracy')
ckpt_l = ModelCheckpoint(filepath='best_loss_bank_weights.hdf5', save_best_only=True, save_weights_only=True, monitor='val_loss')

In [None]:
def save_model_to_json(model, filename):

  model_json = model.to_json()
  with open(filename, 'w') as json_file :
    json_file.write(model_json)

  print('Model saved in json format in {}'.format(filename))


def open_model_from_json(filename, weights):

  from tensorflow.keras.models import model_from_json

  json_file = open(filename, 'r')
  loaded_model_json = json_file.read()
  json_file.close()

  loaded_model = model_from_json(loaded_model_json)
  loaded_model.load_weights(weights)
  print('Model loaded successfully')

  return loaded_model

In [None]:
history = model.fit(x_tr, y_tr, validation_data=(x_te, y_te), epochs=30, callbacks = [ckpt_a, ckpt_l], batch_size=16)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
save_model_to_json(model,'model.json')

model = open_model_from_json(filename='model.json', weights='best_acc_bank_weights.hdf5')
model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')

Model saved in json format in model.json
Model loaded successfully


In [None]:
# save your tokenizer
import pickle

with open('tokenizer.pickle','wb') as f:
  pickle.dump(t, f)

#open and check

tokenizer = pickle.load(open('tokenizer.pickle','rb'))

test_sent = tokenizer.texts_to_sequences(['This is really nice to have!'])
print(test_sent)

[[8, 6, 144, 267, 5, 19]]


In [None]:
#load model, weights, tokenizers

model = open_model_from_json(filename='model.json', weights='best_acc_bank_weights.hdf5')
model.compile(loss = 'binary_crossentropy',
              metrics=['accuracy'],
              optimizer='adam')
tokenizer = pickle.load(open('tokenizer.pickle','rb'))


Model loaded successfully


In [None]:
def testing_function(review, tokenizer = tokenizer, model = model, maxlen=30):

  """
    Enter review, get the clasification as positive or negative (1 or 0 respectively)
    Returns : class of prediciton | Type : <str>
              certainty | Type <float>
  """

  review = normalize(review)
  feature_vec = tokenizer.texts_to_sequences([review])

  feature_vec = pad_sequences(feature_vec, maxlen=maxlen)

  predictions = model.predict(feature_vec)[0]

  classes = ['negative','positive']

  pred = 0 if predictions <0.5 else 1
  certainty = 1-predictions[0] if pred == 0 else predictions[0]

  print('"{}" : Class : |{}|, Intensity : |{:.2f}%|'.format(review, classes[pred], certainty*100))

  return classes[pred], certainty

In [None]:
testing_function('The atm has a cool ac')

"the atm has a cool ac" : Class : |positive|, Intensity : |99.96%|


('positive', 0.99960965)