# Import Library

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Import Dataset

In [2]:
data = pd.read_csv("Restaurant_Reviews.tsv", delimiter='\t')
data.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [3]:
# Cek ukuran dataset

data.shape

(1000, 2)

In [4]:
# Cek informasi dataset

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  1000 non-null   object
 1   Liked   1000 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 15.8+ KB


# Splitting Data

In [5]:
x = data['Review'].values
y = data['Liked'].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

# Modelling Data

In [6]:
tokenizer = Tokenizer(num_words = 1000, oov_token = 'x')
tokenizer.fit_on_texts(x_train)
tokenizer.fit_on_texts(x_test)

sekuen_train = tokenizer.texts_to_sequences(x_train)
sekuen_test = tokenizer.texts_to_sequences(x_test)

padded_train = pad_sequences(sekuen_train, maxlen = 120)
padded_test = pad_sequences(sekuen_test, maxlen = 120)

In [7]:
print("MELIHAT SEMBARANG NILAI")
print("-----------------------\n")

print(f"Nilai x_train[0] : {x_train[0]}")
print(f"Nilai sekuen_train[0] : {sekuen_train[0]}")
print(f"Nilai padded_train[0] : {padded_train[0]}")


MELIHAT SEMBARANG NILAI
-----------------------

Nilai x_train[0] : -My order was not correct.
Nilai sekuen_train[0] : [21, 152, 5, 13, 899]
Nilai padded_train[0] : [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0  21 152   5  13 899]


In [8]:
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(1000, 16, input_length = 120),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(24, activation = 'relu'),
    tf.keras.layers.Dense(1, activation = 'sigmoid')
])

model.summary()

model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 120, 16)           16000     
                                                                 
 global_average_pooling1d (  (None, 16)                0         
 GlobalAveragePooling1D)                                         
                                                                 
 dense (Dense)               (None, 24)                408       
                                                                 
 dense_1 (Dense)             (None, 1)                 25        
                                                                 
Total params: 16433 (64.19 KB)
Trainable params: 16433 (64.19 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [9]:
num_epochs = 50

history = model.fit(padded_train, y_train, epochs=num_epochs,
                    validation_data = (padded_test, y_test), verbose = 2)

Epoch 1/50
25/25 - 2s - loss: 0.6931 - accuracy: 0.5238 - val_loss: 0.6928 - val_accuracy: 0.5850 - 2s/epoch - 82ms/step
Epoch 2/50
25/25 - 0s - loss: 0.6926 - accuracy: 0.5575 - val_loss: 0.6925 - val_accuracy: 0.6200 - 226ms/epoch - 9ms/step
Epoch 3/50
25/25 - 0s - loss: 0.6917 - accuracy: 0.5337 - val_loss: 0.6922 - val_accuracy: 0.4900 - 335ms/epoch - 13ms/step
Epoch 4/50
25/25 - 0s - loss: 0.6909 - accuracy: 0.6037 - val_loss: 0.6914 - val_accuracy: 0.7150 - 340ms/epoch - 14ms/step
Epoch 5/50
25/25 - 0s - loss: 0.6893 - accuracy: 0.6850 - val_loss: 0.6902 - val_accuracy: 0.7100 - 220ms/epoch - 9ms/step
Epoch 6/50
25/25 - 0s - loss: 0.6870 - accuracy: 0.7200 - val_loss: 0.6887 - val_accuracy: 0.6800 - 208ms/epoch - 8ms/step
Epoch 7/50
25/25 - 0s - loss: 0.6836 - accuracy: 0.7738 - val_loss: 0.6864 - val_accuracy: 0.7000 - 288ms/epoch - 12ms/step
Epoch 8/50
25/25 - 0s - loss: 0.6801 - accuracy: 0.6637 - val_loss: 0.6834 - val_accuracy: 0.6150 - 237ms/epoch - 9ms/step
Epoch 9/50
25/2

# Test Model

In [10]:
print("MELIHAT SEMBARANG NILAI")
print("-----------------------\n")

print(f"Nilai x_test[1] : {x_test[1]}")
print(f"Nilai sekuen_test[1] : {sekuen_test[1]}")
print(f"Nilai padded_test[1] : {padded_test[1]}")


MELIHAT SEMBARANG NILAI
-----------------------

Nilai x_test[1] : She ordered a toasted English muffin that came out untoasted.
Nilai sekuen_test[1] : [185, 127, 6, 885, 1, 1, 25, 76, 60, 1]
Nilai padded_test[1] : [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0 185 127   6 885   1   1  25  76  60   1]


In [11]:
padded_test[1]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0, 185, 127,   6, 885,   1,   1,  25,
        76,  60,   1], dtype=int32)

In [12]:
print(f"Nilai y_test[1] : {y_test[1]}")

Nilai y_test[1] : 0


In [14]:
model.predict([[ 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0, 185, 127,   6, 885,   1,   1,  25,
        76,  60,   1]])



array([[0.09405421]], dtype=float32)