In [None]:
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download CellPhones -f CellPhonesRating.csv

In [None]:
!pip install tensorflow-text

In [None]:
# Check libcudnn8 version
!apt-cache policy libcudnn8

# Install latest version
!apt install --allow-change-held-packages libcudnn8=8.4.1.50-1+cuda11.6

# Export env variables
!export PATH=/usr/local/cuda-11.4/bin${PATH:+:${PATH}}
!export LD_LIBRARY_PATH=/usr/local/cuda-11.4/lib64:$LD_LIBRARY_PATH
!export LD_LIBRARY_PATH=/usr/local/cuda-11.4/include:$LD_LIBRARY_PATH
!export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64

In [3]:
import os
import tensorflow as tf 
import numpy as np
import tensorflow_hub as hub
import tensorflow_text as text
import pandas as pd
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split

In [12]:
DATA = ['/content/CellPhonesRating_50K_reviews.csv','dataset_2']
MODEL_PATH = []
PATH = ''
rmse = tf.keras.metrics.RootMeanSquaredError()
precision = tf.keras.metrics.Precision()
METRICS = ['accuracy','mae',rmse,precision]

In [5]:
df = pd.read_csv('/content/CellPhonesRating.csv.zip')

In [6]:
df_sample = df[:50000]
df_sample.to_csv('/content/CellPhonesRating_50K_reviews.csv')

In [18]:
def loadBERT():
  print("== LOADING BERT ...")
  bert_preprocess_model = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
  bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")
  
  print("== BERT LOADED ==")
  return bert_preprocess_model,bert_encoder

def preproDataset(df):
  
  print("== PREPROCESSING DATA ...")
  df = df.dropna(how='any',axis=0)
  df.drop_duplicates(subset =['productID', 'reviewerID'] , keep = 'first' , inplace = True)

  df['one']=df['rating'].apply(lambda x: 1 if x==1.0 else 0)
  df['two']=df['rating'].apply(lambda x: 1 if x==2.0 else 0)
  df['three']=df['rating'].apply(lambda x: 1 if x==3.0 else 0)
  df['four']=df['rating'].apply(lambda x: 1 if x==4.0 else 0)
  df['five']=df['rating'].apply(lambda x: 1 if x==5.0 else 0)
  print("== DATA PREPROCESSED ==")

  return df

def getBLCNNmodel(emb_size,filter):

  bert_preprocess_model,bert_encoder = loadBERT()
  #BERT layers
  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
  preprocessed_review = bert_preprocess_model(text_input)
  outputs = bert_encoder(preprocessed_review)

  # LSTM + CNN layers

  l = tf.keras.layers.LSTM(emb_size, dropout = 0.1, return_sequences=True)(outputs['sequence_output'])

  conv_1 = tf.keras.layers.Conv1D(filters=filter, kernel_size=(3), activation='relu')(l)
  pool_1 = tf.keras.layers.MaxPooling1D((3))(conv_1)
        
  flatten = layers.Flatten()(pool_1)
  hidden1 = layers.Dense(64, activation='relu')(flatten)
  output = layers.Dense(5, activation='softmax')(hidden1)

  model = tf.keras.Model(inputs = text_input, outputs = output)

  return model

def createSentModel(modelID,fileID,nbrE,lossF,OF,emb,filter):

  sparseDf = loadDataset(fileID)
  sparseDf = sparseDf
  sparseDf = preproDataset(sparseDf)
  x_train, x_test, y_train, y_test = train_test_split(sparseDf['reviewText'],sparseDf[['one','two','three','four','five']],test_size = 0.2,stratify=sparseDf[['one','two','three','four','five']])
  model_trained = trainModel(modelID,nbrE,lossF,OF,x_train,y_train,embed_size = emb,filter_size = filter)

  return model_trained,x_test,y_test

def evaluateModel(model,x_test,y_test):
  model.evaluate(x_test,y_test)

def loadDataset(fileID):
  dataset = pd.read_csv(DATA[fileID])
  print("== FILE LOADED ==")
  return dataset

def trainModel(modelID,nbrEpochs,lossF,OF,x_train ,y_train ,mid_layer_ratio=None,nb_layers=None,maxUserID = None,maxItemID = None,embed_size = None,filter_size = None):
  
  if modelID =='BLCNN':
    model = getBLCNNmodel(embed_size,filter_size)
  if modelID == 'GMF':
    model = getGMFmodel(maxUserID,maxItemID,embed_size)
  elif modelID == 'S-AutoCF':
    model = getAutoCFmodel(x_train,mid_layer_ratio,nb_layers)
  elif modelID == 'SS-AutoCF':
    model = getSS_HAEmodel(x_train,mid_layer_ratio,nb_layers)

  model.compile(optimizer = OF,
                    loss = lossF,
                    metrics= METRICS)
  print("== TRAINING IN PROGRESS ...")
  model.fit(x_train,y_train,epochs = nbrEpochs)
  
  return model

In [None]:
from google.colab import drive
 
drive.mount('/content/gdrive')

In [None]:
#Creating checkpoint directory to save model's weights
checkpoint_path = "/content/gdrive/MyDrive/training_LSTM_CNN_Full_CP_Dataset/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [None]:
model_trained,x_test,y_test = createSentModel('BLCNN',0,5,tf.keras.losses.CategoricalCrossentropy(),'adam',200,64)

== FILE LOADED ==
== PREPROCESSING DATA ...
== DATA PREPROCESSED ==
== LOADING BERT ...
== BERT LOADED ==
== TRAINING IN PROGRESS ...
Epoch 1/5
Epoch 2/5
Epoch 3/5

In [9]:
evaluateModel(model_trained,x_test,y_test)



In [None]:
bert_cnn_model.fit(X_train,y_train,epochs = 25,callbacks=[cp_callback])

In [None]:
bert_cnn_model.load_weights(checkpoint_path)