# **Setup**

In [2]:
# download data
# !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1nckRRVYRiWG8VWoUY4wwtuQm6DjIbpmo' -O w_review_train.csv

# # download font for matplot
# !wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1DsfYE5uI1ZA_IXDNkezQTv-NQehxgZQy' -O THSarabun.ttf

# install library
!pip install pythainlp
# get tensorflow
# !pip install --upgrade pip
!pip install tensorflow



# Import Library
---

In [3]:
from tensorflow import keras

import pandas as pd
import numpy as np
from pythainlp import word_tokenize
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences

---
### Prepare Data {Tokenize Word}
---

In [10]:
# check data
data = pd.read_csv("FastFood_Opinion.csv")
print(data.shape)
print(data.head())

# word tokenize
comment = data["message"].apply(word_tokenize, engine="newmm", keep_whitespace=False)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(comment)
comment = tokenizer.texts_to_sequences(comment)

# adjust sequence to have same size
maxlen = max([len(s) for s in comment])
print("max len", maxlen)
x = pad_sequences(comment, maxlen=maxlen, padding="post")

y = np.array(data["class"], dtype=np.int32).reshape(-1, 1)

# test class at comment n-1
print(y[0], np.argmax(y[0]))
print(y[400], np.argmax(y[400]))

xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size=0.2, random_state=25)

(642, 2)
   class                                            message
0      0  11.30 น. ไปที่สาขาในปั๊มคาลเท็กซ์ ประชานุกูล ซ...
1      0  BK ทุกสาขาบริการดี โดยเฉพาะอย่างยิ่งเอมโพเรียม...
2      0  Burger King สาขา The Bright พระราม 2 บริการห่ว...
3      0  Burger King สาขาปั้ม ป.ต.ท.บ้านพร อ.ถลาง เมื่อ...
4      0  chicken strip ชิ้นเล็กมาก เทียบกับป้ายโฆษณาบนโต๊ะ
max len 417
[0] 0
[1] 0


In [11]:
from keras.layers import SimpleRNN, LSTM, Embedding, Flatten, Dense
from keras.models import Sequential, Model

---
### Fully Connected | [Sequential](https://keras.io/api/models/sequential/), [Dense](https://keras.io/api/layers/core_layers/dense/), [Embedding](https://keras.io/api/layers/core_layers/embedding/)
---

In [18]:
# Tokenizer().word_index = word index
# +1 is from function paramiter
vocabSize = len(tokenizer.word_index) + 1

model = Sequential()
model.add(Embedding(input_dim=vocabSize,
                   output_dim=500,
                   input_length=maxlen))
model.add(Dense(100, activation="relu"))
model.add(Dense(100, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

model.summary()

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
print("x", xTrain.shape, ":", "y", yTrain.shape)
model.fit(xTrain, yTrain, batch_size=64, epochs=5, verbose=1)

y_predict = model.predict(xTest)
print(model.evaluate(xTest, yTest, verbose=1))

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_8 (Embedding)     (None, 417, 500)          1752500   
                                                                 
 dense_24 (Dense)            (None, 417, 100)          50100     
                                                                 
 dense_25 (Dense)            (None, 417, 100)          10100     
                                                                 
 dense_26 (Dense)            (None, 417, 1)            101       
                                                                 
Total params: 1812801 (6.92 MB)
Trainable params: 1812801 (6.92 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
x (513, 417) : y (513, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[0.670784056186676, 0.5581395626068115]


In [19]:
vocabSize = len(tokenizer.word_index)

modelRNN = Sequential()
modelRNN.add(SimpleRNN(500, input_shape=(maxlen, vocabSize)))
modelRNN.add(Dense(100, activation="relu"))
modelRNN.add(Dense(100, activation="relu"))
modelRNN.add(Dense(1, activation="sigmoid"))

modelRNN.summary()

modelRNN.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
modelRNN.fit(xTrain, yTrain, batch_size=64, epochs=5, verbose=1)

yPredict = modelRNN.predict(xTest)
print(modelRNN.evaluate(xTest, yTest, verbose=1))

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 500)               2002500   
                                                                 
 dense_27 (Dense)            (None, 100)               50100     
                                                                 
 dense_28 (Dense)            (None, 100)               10100     
                                                                 
 dense_29 (Dense)            (None, 1)                 101       
                                                                 
Total params: 2062801 (7.87 MB)
Trainable params: 2062801 (7.87 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/5


ValueError: in user code:

    File "C:\Users\Lenovo\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\Lenovo\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Lenovo\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\Lenovo\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1080, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\Lenovo\anaconda3\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\Lenovo\anaconda3\lib\site-packages\keras\src\engine\input_spec.py", line 235, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_10' (type Sequential).
    
    Input 0 of layer "simple_rnn" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 417)
    
    Call arguments received by layer 'sequential_10' (type Sequential):
      • inputs=tf.Tensor(shape=(None, 417), dtype=int32)
      • training=True
      • mask=None
