In [None]:
import numpy as np 
import pandas as pd

In [None]:
# import BERT tokenization

!wget --quiet https://raw.githubusercontent.com/tensorflow/models/master/official/nlp/bert/tokenization.py

In [None]:
!pip install bert-tensorflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bert-tensorflow
  Downloading bert_tensorflow-1.0.4-py2.py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.4/64.4 KB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: bert-tensorflow
Successfully installed bert-tensorflow-1.0.4


In [None]:
# import tokenization
# import tensorflow_hub as hub
# import tokenization
from bert import tokenization
import tensorflow as tf
import tensorflow_hub as hub
from keras.utils import to_categorical
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [None]:
train_data = pd.read_csv('/content/Train.csv', encoding='latin-1')
test_data = pd.read_csv('/content/evaluation - evaluation.csv', encoding='latin-1')

In [None]:
train_data.text.values

array(['this is an amazing app for online classes!but',
       'very practical and easy to use',
       'this app is very good for video conferencing.', ...,
       'hello,\ndisney+ must also be installed on chromecast',
       'it is a shame that disney+ does not work on tv box devices.',
       'i have to close and reload the app over and over again until the sound works.'],
      dtype=object)

In [None]:
train_data.head()

Unnamed: 0,text,reason,label
0,this is an amazing app for online classes!but,good app for conducting online classes,1
1,very practical and easy to use,app is user-friendly,1
2,this app is very good for video conferencing.,good for video conferencing,1
3,i can not download this zoom app,unable to download zoom app,1
4,i am not able to download this app,want to download the app,1


In [None]:
test_data.head()

Unnamed: 0,text,reason,label
0,the app is crashing when i play a vedio,app crashes during playback,1
1,but i want to connect it to the tv from one de...,want compatibility with more smart televisions,0
2,very helpful when and home working remotley,good app for work,0
3,this zoom so called and missed call and mobile...,receiving incorrect phone number message,0
4,one of my favorite apps,good for spending time,0


In [None]:
label = preprocessing.LabelEncoder()
y = label.fit_transform(train_data['label'])
y = to_categorical(y)
print(y[:-5])

[[0. 1.]
 [0. 1.]
 [0. 1.]
 ...
 [1. 0.]
 [1. 0.]
 [1. 0.]]


In [None]:
m_url = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2'
bert_layer = hub.KerasLayer(m_url, trainable=True)

In [None]:
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)

def bert_encode(data, tokenizer, max_len=512):
    all_tokens = []
    all_masks = []
    all_segments = []
    
    for text,reason in (data.text.values,data.reason.values):
        text_reason = tokenizer.tokenize(text,reason)
        
        text_reason = text_reason[:max_len-2]
        input_sequence = ["[CLS]"] + text_reason + ["[SEP]"]
        pad_len = max_len-len(input_sequence)
        
        tokens = tokenizer.convert_tokens_to_ids(input_sequence) + [0] * pad_len
        pad_masks = [1] * len(input_sequence) + [0] * pad_len
        segment_ids = [0] * max_len
        
        all_tokens.append(tokens)
        all_masks.append(pad_masks)
        all_segments.append(segment_ids)
        
    return np.array(all_tokens), np.array(all_masks), np.array(all_segments)

In [None]:
def build_model(bert_layer, max_len=512):
    input_word_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="input_word_ids")
    input_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="input_mask")
    segment_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="segment_ids")
    
    pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])
    
    clf_output = sequence_output[:, 0, :]
    
    lay = tf.keras.layers.Dense(64, activation='relu')(clf_output)
    lay = tf.keras.layers.Dropout(0.2)(lay)
    lay = tf.keras.layers.Dense(32, activation='relu')(lay)
    lay = tf.keras.layers.Dropout(0.2)(lay)
    out = tf.keras.layers.Dense(2, activation='softmax')(lay)
    
    model = tf.keras.models.Model(inputs=[input_word_ids, input_mask, segment_ids], outputs=out)
    model.compile(tf.keras.optimizers.Adam(lr=2e-5), loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
text = tokenizer.tokenize(train_data.text.values[0])

In [None]:
train_data_joint=train_data.text.values+"-"+train_data.reason.values
train_data_joint

array(['this is an amazing app for online classes!but-good app for conducting online classes',
       'very practical and easy to use-app is user-friendly',
       'this app is very good for video conferencing.-good for video conferencing',
       ...,
       'hello,\ndisney+ must also be installed on chromecast-not able to receive virtual call with physician office without connection issues!',
       'it is a shame that disney+ does not work on tv box devices.-zoom just withdraw money from my account without any notification, just an invoice of payment i did not make, they really need to stop that',
       'i have to close and reload the app over and over again until the sound works.-just when I am about to join an important meeting, i can not access zoom because of a mere update..'],
      dtype=object)

In [None]:
test_data_joint=test_data.text.values+"-"+test_data.reason.values
test_data_joint

array(['the app is crashing when i play a vedio-app crashes during playback',
       'but i want to connect it to the tv from one device to another-want compatibility with more smart televisions',
       'very helpful when and home working remotley-good app for work',
       ...,
       'it does not work well for me, it loads, but if i try to advance what I am seeing, it does not load anymore-app keeps loading indefinitely',
       'i can not watch the new disney shows.-unable to load the episodes list',
       'really if i rewind a seconde before the ad ended you make me watch the entire 2 minutes ad again?!??-getting ads frequently'],
      dtype=object)

In [None]:
max_len = 250
train_input_text = bert_encode(train_data.text.values, tokenizer, max_len=max_len)
train_input_reason = bert_encode(train_data.reason.values, tokenizer, max_len=max_len)
test_input_text = bert_encode(test_data.text.values, tokenizer, max_len=max_len)
test_input_reason = bert_encode(test_data.reason.values, tokenizer, max_len=max_len)

train_labels = y

In [None]:
max_len = 250
train_input = bert_encode(train_data, tokenizer, max_len=max_len)
test_input = bert_encode(test_data, tokenizer, max_len=max_len)
train_labels = y

ValueError: ignored

In [None]:
train_input= tuple(zip(train_input_text, train_input_reason))
test_input= tuple(zip(test_input_text, test_input_reason))

In [None]:
np.shape(train_input_text)

(3, 4122, 250)

In [None]:
np.reshape(train_input,(6, 1, 4122, 250))

array([[[[  101,  2023,  2003, ...,     0,     0,     0],
         [  101,  2200,  6742, ...,     0,     0,     0],
         [  101,  2023, 10439, ...,     0,     0,     0],
         ...,
         [  101,  7592,  1010, ...,     0,     0,     0],
         [  101,  2009,  2003, ...,     0,     0,     0],
         [  101,  1045,  2031, ...,     0,     0,     0]]],


       [[[  101,  2204, 10439, ...,     0,     0,     0],
         [  101, 10439,  2003, ...,     0,     0,     0],
         [  101,  2204,  2005, ...,     0,     0,     0],
         ...,
         [  101,  2025,  2583, ...,     0,     0,     0],
         [  101, 24095,  2074, ...,     0,     0,     0],
         [  101,  2074,  2043, ...,     0,     0,     0]]],


       [[[    1,     1,     1, ...,     0,     0,     0],
         [    1,     1,     1, ...,     0,     0,     0],
         [    1,     1,     1, ...,     0,     0,     0],
         ...,
         [    1,     1,     1, ...,     0,     0,     0],
         [    1,     1

In [None]:
labels = label.classes_
print(labels)

[0 1]


In [None]:
model = build_model(bert_layer, max_len=max_len)
model.summary()

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_word_ids (InputLayer)    [(None, 250)]        0           []                               
                                                                                                  
 input_mask (InputLayer)        [(None, 250)]        0           []                               
                                                                                                  
 segment_ids (InputLayer)       [(None, 250)]        0           []                               
                                                                                                  
 keras_layer (KerasLayer)       [(None, 768),        109482241   ['input_word_ids[0][0]',         
                                 (None, 250, 768)]                'input_mask[0][0]',         

In [None]:
checkpoint = tf.keras.callbacks.ModelCheckpoint('model.h5', monitor='val_accuracy', save_best_only=True, verbose=1)
earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, verbose=1)

train_sh = model.fit(
    train_input, train_labels,
    validation_split=0.2,
    epochs=3,
    callbacks=[checkpoint, earlystopping],
    batch_size=32,
    verbose=1
)

Epoch 1/3


The system ran out of RAM after this .