In [None]:
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download CellPhones -f CellPhonesRating.csv

In [None]:
!pip install tensorflow-text

In [None]:
import os
import tensorflow as tf 
import numpy as np
import tensorflow_hub as hub
import tensorflow_text as text
import pandas as pd
from tensorflow.keras import datasets, layers, models

In [None]:
bert_preprocess_model = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [None]:
from google.colab import drive
 
drive.mount('/content/gdrive')

In [None]:
df = pd.read_csv("/content/CellPhonesRating.csv.zip")
df = df.dropna(how='any',axis=0)
df.drop_duplicates(subset =['productID', 'reviewerID'] , keep = 'first' , inplace = True)
df.head()

In [None]:
df['one']=df['rating'].apply(lambda x: 1 if x==1.0 else 0)
df['two']=df['rating'].apply(lambda x: 1 if x==2.0 else 0)
df['three']=df['rating'].apply(lambda x: 1 if x==3.0 else 0)
df['four']=df['rating'].apply(lambda x: 1 if x==4.0 else 0)
df['five']=df['rating'].apply(lambda x: 1 if x==5.0 else 0)

In [None]:
#Train/Test data split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df['reviewText'],df[['one','two','three','four','five']], stratify=df[['one','two','three','four','five']])

In [None]:
X_test.shape

(921911,)

In [None]:
#BERT layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_review = bert_preprocess_model(text_input)
outputs = bert_encoder(preprocessed_review)

In [None]:
# LSTM + CNN layers

l = tf.keras.layers.LSTM(768, dropout = 0.1, return_sequences=True)(outputs['sequence_output'])

conv_1 = tf.keras.layers.Conv1D(filters=128, kernel_size=(3), activation='relu')(l)
pool_1 = tf.keras.layers.MaxPooling1D((3))(conv_1)
      
flatten = layers.Flatten()(pool_1)
hidden1 = layers.Dense(64, activation='relu')(flatten)
output = layers.Dense(5, activation='softmax')(hidden1)

bert_cnn_model = tf.keras.Model(inputs = text_input, outputs = output)

In [None]:
bert_cnn_model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'])

In [None]:
bert_cnn_model.summary()

In [None]:
#Creating checkpoint directory to save model's weights
checkpoint_path = "/content/gdrive/MyDrive/training_LSTM_CNN_Full_CP_Dataset/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [None]:
bert_cnn_model.fit(X_train,y_train,epochs = 25,callbacks=[cp_callback])

In [None]:
bert_cnn_model.load_weights(checkpoint_path)

In [None]:
bert_cnn_model.evaluate(X_test[:1000],y_test[:1000])



[2.3407304286956787, 0.6520000100135803]

In [None]:
bert_cnn_model.fit(X_train,y_train,epochs = 5,callbacks=[cp_callback])