# **SENTIMENTAL ANANLYSIS USING TENSORFLOW**

In this notebook, we'll apply deep learning technique using Keras and LSTM (long short-term memory units) to figure out the sentiment of a sentence

---

### BASIC FRAME OF THE MODEL
The task of sentiment analysis involves taking in an input sequence of words and determining whether the sentence is positive or not. We can separate this into 5 different components:

1. Encoding the training dataset using 'encoder'
2. Apply padding to the arrays to make each string of the same length
3. Use Sequential() in Keras to train the model using multiple layers
4. Training
5. Testing


In [None]:
import tensorflow_datasets as tfds
import tensorflow as tf

data_set, info= tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)



[1mDownloading and preparing dataset imdb_reviews/subwords8k/1.0.0 (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…







HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incompleteNWAZS2/imdb_reviews-train.tfrecord


HBox(children=(FloatProgress(value=0.0, max=25000.0), HTML(value='')))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incompleteNWAZS2/imdb_reviews-test.tfrecord


HBox(children=(FloatProgress(value=0.0, max=25000.0), HTML(value='')))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incompleteNWAZS2/imdb_reviews-unsupervised.tfrecord


HBox(children=(FloatProgress(value=0.0, max=50000.0), HTML(value='')))



[1mDataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0. Subsequent calls will reuse this data.[0m


In [None]:
#training and testing dataset
train_dataset, test_dataset = data_set['train'], data_set['test']
encoder= info.features['text'].encoder

In [None]:
encoder

<SubwordTextEncoder vocab_size=8185>

In [None]:
BUFFER_SIZE= 1000
BATCH_SIZE=40

In [None]:
#padding
padded_shapes= ([None],())

In [None]:
train_dataset= train_dataset.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE,padded_shapes=padded_shapes)
test_dataset= test_dataset.padded_batch(BATCH_SIZE,padded_shapes=padded_shapes)

In [None]:
# model= tf.keras.Sequential([tf.keras.layers.Embedding(encoder.vocab_size, BATCH_SIZE),
#                            tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(BATCH_SIZE)),
#                            tf.keras.layers.Dense(BATCH_SIZE,activation= 'relu'),
#                            tf.keras.layers.Dense(1, activation= 'sigmoid')])
# model.compile(loss='binary_crossentropy',
#              optimizer=tf.keras.optimizers.Adam(1e-4),
#              metrics=['accuracy'])

In [None]:
# history = model.fit(train_dataset, epochs=3, validation_data=test_dataset,
#                    validation_steps=30)

In [None]:
def pad_to_size(vec, size):
    zeros=[0]*(size- len(vec))
    vec.extend(zeros)
    return vec

In [None]:
def sample_predict(sentence, pad):
  encoded_sample_pred_text= encoder.encode(sentence)
  if pad:
    encoded_sample_pred_text= pad_to_size(encoded_sample_pred_text,40)

  encoded_sample_pred_text= tf.cast(encoded_sample_pred_text, tf.float32)
  predictions= model.predict(tf.expand_dims(encoded_sample_pred_text,0))

  return predictions

In [None]:
model= tf.keras.Sequential([tf.keras.layers.Embedding(encoder.vocab_size, 40),
                             tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
                             tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
                             tf.keras.layers.Dense(64,activation='relu'),
                             tf.keras.layers.Dropout(0.5),
                             tf.keras.layers.Dense(1, activation='sigmoid')])
model.compile(loss='binary_crossentropy',
              optimizer= tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [None]:
history= model.fit(train_dataset, epochs=1, validation_data= test_dataset,
                   validation_steps=30)

In [None]:
sample_text = ('It is bad')
predictions= sample_predict(sample_text, pad=True) *100

print('probability that it is a positive review is %.2f percent'  %predictions)

In [None]:
sample_text = ('It is excellent')
predictions= sample_predict(sample_text, pad=True) *100

print('probability that it is a positive review is %.2f percent'  %predictions)

In [None]:
# Tweet by @NarendraModi

sample_text = ('Best wishes to all @BSF_India personnel and their families on the special occasion of their Raising Day. BSF has distinguished itself as a valorous force, unwavering in their commitment to protect the nation and assist citizens during natural calamities. India is proud of BSF')
predictions= sample_predict(sample_text, pad=True) *100

print('probability that it is a positive review is %.2f percent'  %predictions)

In [None]:
# Tweet by @realDonaldTrump

sample_text = ('Just saw the vote tabulations. There is NO WAY Biden got 80,000,000 votes!!! This was a 100% RIGGED ELECTION.')
predictions= sample_predict(sample_text, pad=True) *100

print('probability that it is a positive review is %.2f percent'  %predictions)

In [None]:
# Tweet by @BarackObama 

sample_text = ('Let’s all do our part this Thanksgiving to keep people safe and healthy. Celebrate virtually, if you can. Wear a mask. And as always, listen to the experts. The choices you make could save lives.')
predictions= sample_predict(sample_text, pad=True) *100

print('probability that it is a positive review is %.2f percent'  %predictions)

In [None]:
# Tweet by @AkshayKumar

sample_text = ('26/11, a day Mumbaikars will never forget. My heartfet tribute to the martyrs and victims of the #MumbaiTerrorAttack. We will forever be indebted to our bravehearts for their supreme sacrifice')
predictions= sample_predict(sample_text, pad=True) *100

print('probability that it is a positive review is %.2f percent'  %predictions)

In [None]:
# Tweet by @RahulGandhi

sample_text = ('In BJP/RSS vision of India, Adivasis and Dalits should not have access to education. Stopping scholarships for SC-ST students is their way of ends justifying their means.')
predictions= sample_predict(sample_text, pad=True) *100

print('probability that it is a positive review is %.2f percent'  %predictions)

In [None]:
# Tweet by @Aamir_Khan

sample_text = ('Many happy returns of the day, @SrBachchan Sir. May good health and happiness always be with you. Love. a.')
predictions= sample_predict(sample_text, pad=True) *100

print('probability that it is a positive review is %.2f percent'  %predictions)

In [None]:
# Tweet by @Sachin_Tendulkar

sample_text = ('The wounds may have healed, but the scars remain. Of the lives lost and the sacrifices made. These shall always be a reminder of the strength of human spirit to overcome any act of adversity. Remembering all our martyrs on this day. #MumbaiTerrorAttack')
predictions= sample_predict(sample_text, pad=True) *100

print('probability that it is a positive review is %.2f percent'  %predictions)

In [None]:
import matplotlib.pyplot as plt

def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_' + metric + ''])
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_ ' + metric])
  plt.show()

In [None]:
plot_graphs(history, 'accuracy')

---