# The Third set of parameters

**maxlen = 200,
batch size = 8,
epochs = 10,
steps per epoch = 64,
embedding dimension = 64**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras

df=pd.read_csv("/content/drive/MyDrive/Colab Notebooks/SU23 Deep Learning/Final Project/Yelp Restaurant Reviews.csv",index_col=0)
df=df.loc[:,["Review Text","Rating"]].dropna().reset_index(drop=True)

In [3]:
from sklearn.model_selection import train_test_split
rev=df["Review Text"]
target=df["Rating"]
x_train, x_test, y_train, y_test = train_test_split(rev,target,test_size=0.2,random_state=0,stratify=target)

In [4]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

#Text sampling
max_features = 10000
maxlen = 200
batch_size = 8

#Tokenizing
tokenizer=Tokenizer(max_features,oov_token="<OOV>")
tokenizer.fit_on_texts(x_train)
X_train=tokenizer.texts_to_sequences(x_train)
X_test=tokenizer.texts_to_sequences(x_test)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

#Turns tockenized texts into padded sequences
print('Pad sequences (samples x text length)')
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

15916 train sequences
3980 test sequences
Pad sequences (samples x text length)
X_train shape: (15916, 200)
X_test shape: (3980, 200)


# SimpleRNN

In [5]:
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, Dense

model = Sequential()
embedding_dim = 64
model.add(Embedding(max_features, embedding_dim, input_length=maxlen))
model.add(SimpleRNN(64, return_sequences=True))
model.add(SimpleRNN(32, return_sequences=True))
model.add(SimpleRNN(16))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mae', metrics='mape') #mape: Mean Absolute Percentage Error
history = model.fit(X_train, y_train,
                    epochs=10,
                    steps_per_epoch = 64,
                    batch_size=batch_size,
                   validation_data=(X_test,y_test))

keras.backend.clear_session()
del model

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Pure CNN with word Embedding

In [6]:
from keras.models import Sequential
from keras.layers import Embedding
from keras import layers

model = Sequential()
embedding_dim = 64
model.add(Embedding(max_features, embedding_dim, input_length=maxlen))
model.add(layers.Conv1D(64, 5, activation='relu'))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32, 5, activation='relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(1))

model.compile(optimizer='adam', loss='mae', metrics='mape') #mape: Mean Absolute Percentage Error
history = model.fit(X_train, y_train,
                    epochs=10,
                    steps_per_epoch = 64,
                    batch_size=batch_size,
                   validation_data=(X_test,y_test))

keras.backend.clear_session()
del model

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Pure LSTM with Word Embedding

In [7]:
from keras.models import Sequential
from keras.layers import Embedding
from keras import layers

model = Sequential()
embedding_dim = 64
model.add(Embedding(max_features, embedding_dim, input_length=maxlen))
model.add(layers.LSTM(32,dropout=0.2,recurrent_dropout=0.2,return_sequences=True))
model.add(layers.LSTM(16))
model.add(layers.Dense(1))
model.compile(optimizer='adam', loss='mae', metrics='mape')
history = model.fit(X_train, y_train,
                    epochs=10,
                    steps_per_epoch = 64,
                    batch_size=batch_size,
                    validation_data=(X_test,y_test))
keras.backend.clear_session()
del model

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Combine CNN and LSTM with Word Embedding

In [8]:
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Conv1D, MaxPooling1D
from keras.layers import Dense

model = Sequential()
embedding_dim = 64
model.add(Embedding(max_features, embedding_dim, input_length=maxlen))
model.add(layers.Conv1D(64, 5, activation='relu'))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32, 5, activation='relu'))
model.add(layers.LSTM(32,dropout=0.2,recurrent_dropout=0.2,return_sequences=True))
model.add(layers.LSTM(16))
model.add(layers.Dense(1))

model.compile(optimizer='adam', loss='mae', metrics='mape')
history = model.fit(X_train, y_train,
                    epochs=10,
                    steps_per_epoch = 64,
                    batch_size=batch_size,
                   validation_data=(X_test,y_test))
keras.backend.clear_session()
del model

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Combine CNN with Bidirectional LSTM

In [9]:
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Conv1D, MaxPooling1D
from keras.layers import Dense

model = Sequential()
embedding_dim = 64
model.add(Embedding(max_features, embedding_dim, input_length=maxlen))
model.add(layers.Conv1D(64, 5, activation='relu'))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32, 5, activation='relu'))
model.add(layers.Bidirectional(
    layers.LSTM(32,dropout=0.2,recurrent_dropout=0.2,return_sequences=True)))
model.add(layers.Bidirectional(layers.LSTM(16)))
model.add(layers.Dense(1))

model.compile(optimizer='adam', loss='mae', metrics='mape')
history = model.fit(X_train, y_train,
                    epochs=10,
                    steps_per_epoch = 64,
                    batch_size=batch_size,
                   validation_data=(X_test,y_test))
keras.backend.clear_session()
del model

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
