# BERT Implementation using Tensorflow by `Mr. Harshit Dawar!`

In [7]:
# Importing the libraries for Tensorflow

import tensorflow_hub as th
import tensorflow_text as text

## Loading the Transfer Learning the BERT Model & the Preporcessor for the BERT Model

In [8]:
BERT_MODEL = th.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [9]:
Text_Preprocessor = th.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")

2021-09-18 19:15:00.832859: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


## Loading the Dataset

In [10]:
import pandas as pd

In [11]:
data = pd.read_excel("amazonLabelled.xlsx")

In [12]:
data

Unnamed: 0,S,Feedback,Sentiment
0,1.0,"Good case, Excellent value.",Positive
1,2.0,Great for the jawbone.,Positive
2,3.0,Tied to charger for conversations lasting more...,Negative
3,4.0,The mic is great.,Positive
4,5.0,I have to jiggle the plug to get it to line up...,Negative
...,...,...,...
994,995.0,The screen does get smudged easily because it ...,Negative
995,996.0,What a piece of junk.. I lose more calls on th...,Negative
996,997.0,Item Does Not Match Picture.,Negative
997,998.0,The only thing that disappoint me is the infra...,Negative


## Processing the Dataset

In [13]:
data.drop("S", axis = 1, inplace=True)

In [14]:
data

Unnamed: 0,Feedback,Sentiment
0,"Good case, Excellent value.",Positive
1,Great for the jawbone.,Positive
2,Tied to charger for conversations lasting more...,Negative
3,The mic is great.,Positive
4,I have to jiggle the plug to get it to line up...,Negative
...,...,...
994,The screen does get smudged easily because it ...,Negative
995,What a piece of junk.. I lose more calls on th...,Negative
996,Item Does Not Match Picture.,Negative
997,The only thing that disappoint me is the infra...,Negative


In [16]:
preprocessed_texts = Text_Preprocessor(data.Feedback)

In [18]:
preprocessed_texts.keys()

dict_keys(['input_word_ids', 'input_mask', 'input_type_ids'])

In [19]:
preprocessed_texts["input_word_ids"]

<tf.Tensor: shape=(999, 128), dtype=int32, numpy=
array([[ 101, 2204, 2553, ...,    0,    0,    0],
       [ 101, 2307, 2005, ...,    0,    0,    0],
       [ 101, 5079, 2000, ...,    0,    0,    0],
       ...,
       [ 101, 8875, 2515, ...,    0,    0,    0],
       [ 101, 1996, 2069, ...,    0,    0,    0],
       [ 101, 2017, 2064, ...,    0,    0,    0]], dtype=int32)>

In [21]:
preprocessed_texts["input_mask"][0]

<tf.Tensor: shape=(128,), dtype=int32, numpy=
array([1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)>

In [46]:
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [47]:
sentiments = LabelEncoder().fit_transform(data.Sentiment.values)

In [48]:
sentiments = to_categorical(sentiments)

In [49]:
sentiments

array([[0., 1.],
       [0., 1.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

## Generating the NLP Model for the Sentiment Analysis

In [25]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input
import tensorflow as tf

In [40]:
"""
We cannot subscript in Sequential Model, so the below code will not work!
"""

# model = Sequential([
#                         Text_Preprocessor,
#                         BERT_MODEL["pooled_output"],
#                         Dense(2, activation = "sigmoid")
# ])

In [85]:
# model = Sequential([
#                         Input(shape = (128, 768)),
#                         Dense(2, activation = "sigmoid")
# ])

In [128]:
# Creating the actual Model of BERT

inputs = Input(shape = (), dtype = tf.string)

encoded_data = Text_Preprocessor(inputs)
bert_output = BERT_MODEL(encoded_data)

outputs = Dense(units = 2, activation = "sigmoid")(bert_output["pooled_output"])

model = Model(inputs, outputs)

In [129]:
model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])

In [130]:
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_18 (InputLayer)           [(None,)]            0                                            
__________________________________________________________________________________________________
keras_layer_2 (KerasLayer)      {'input_word_ids': ( 0           input_18[0][0]                   
__________________________________________________________________________________________________
keras_layer_1 (KerasLayer)      {'default': (None, 7 109482241   keras_layer_2[5][0]              
                                                                 keras_layer_2[5][1]              
                                                                 keras_layer_2[5][2]              
____________________________________________________________________________________________

In [131]:
history = model.fit(data.Feedback.values, sentiments, epochs = 10, verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Congratulatios, you have learned how to implement BERT!