# Sentiment Analysis using BERT by `Mr. Harshit Dawar!`

In [2]:
import tensorflow_hub as hub
import tensorflow_text as text
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [3]:
# Using Transfer Learning Models
TextProcessor = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
BERT_Processor = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

2021-09-19 15:07:54.281688: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-09-19 15:07:55.958011: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [4]:
data = pd.read_excel("amazonLabelled.xlsx")

In [5]:
data

Unnamed: 0,S,Feedback,Sentiment
0,1.0,"Good case, Excellent value.",Positive
1,2.0,Great for the jawbone.,Positive
2,3.0,Tied to charger for conversations lasting more...,Negative
3,4.0,The mic is great.,Positive
4,5.0,I have to jiggle the plug to get it to line up...,Negative
...,...,...,...
994,995.0,The screen does get smudged easily because it ...,Negative
995,996.0,What a piece of junk.. I lose more calls on th...,Negative
996,997.0,Item Does Not Match Picture.,Negative
997,998.0,The only thing that disappoint me is the infra...,Negative


In [6]:
data.drop("S", axis = 1, inplace = True)
sentiments = data.Sentiment.values

sentiments = LabelEncoder().fit_transform(sentiments)

In [7]:
sentiments

array([1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1,

In [8]:
sentiments = to_categorical(sentiments)

In [9]:
sentiments

array([[0., 1.],
       [0., 1.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [13]:
data.Feedback

0                            Good case, Excellent value.
1                                 Great for the jawbone.
2      Tied to charger for conversations lasting more...
3                                      The mic is great.
4      I have to jiggle the plug to get it to line up...
                             ...                        
994    The screen does get smudged easily because it ...
995    What a piece of junk.. I lose more calls on th...
996                         Item Does Not Match Picture.
997    The only thing that disappoint me is the infra...
998    You can not answer calls with the unit, never ...
Name: Feedback, Length: 999, dtype: object

In [16]:
demo = BERT_Processor(TextProcessor(data.Feedback[:5]))

In [17]:
demo.keys()

dict_keys(['default', 'encoder_outputs', 'pooled_output', 'sequence_output'])

In [20]:
demo["pooled_output"].shape, demo["sequence_output"].shape

(TensorShape([5, 768]), TensorShape([5, 128, 768]))

In [22]:
inputs = Input(shape = (), dtype = tf.string)

processed_texts = TextProcessor(inputs)
encoded_texts = BERT_Processor(processed_texts)

outputs = Dense(units = 2, activation = "softmax")(encoded_texts["pooled_output"])

model = Model(inputs, outputs)

In [23]:
model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])

In [24]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None,)]            0                                            
__________________________________________________________________________________________________
keras_layer (KerasLayer)        {'input_word_ids': ( 0           input_1[0][0]                    
__________________________________________________________________________________________________
keras_layer_1 (KerasLayer)      {'default': (None, 7 109482241   keras_layer[0][0]                
                                                                 keras_layer[0][1]                
                                                                 keras_layer[0][2]                
______________________________________________________________________________________________

In [25]:
history = model.fit(data.Feedback.values, sentiments, epochs = 15, verbose = 1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
