In [2]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.layers import TextVectorization
import numpy as np

data = pd.read_csv('reviews.csv') # 41319 value, max_length = 4542
x = data['content'].astype(str)
y = data['score']
y = np.array((int(float(rating)) if rating > 5 else 5) for rating in y)

data = data.dropna(subset=['score'])   

x = data['content'].astype(str)
y = data['score'].astype(int)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)

vectorize_layer = TextVectorization(
    max_tokens=450,
    output_mode='int',
    output_sequence_length=350
)

vectorize_layer.adapt(x)

x_train_vectorized = vectorize_layer(x_train)
x_test_vectorized = vectorize_layer(x_test)

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Input(shape=(350,)))
model.add(tf.keras.layers.Embedding(input_dim=450, output_dim=64, name="embedding"))
model.add(tf.keras.layers.SimpleRNN(64))
model.add(tf.keras.layers.Dense(6))

model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            optimizer='adam', metrics=['accuracy'])

In [3]:
model.fit(x_train_vectorized, y_train, epochs=2, validation_data=(x_test_vectorized, y_test))
loss, accuracy = model.evaluate(x_test_vectorized, y_test)

print(f'Test loss: {loss}')
print(f'Test accuracy: {accuracy}')

Epoch 1/2
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - accuracy: 0.7460 - loss: 0.9136 - val_accuracy: 0.7710 - val_loss: 0.8105
Epoch 2/2
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 31ms/step - accuracy: 0.7613 - loss: 0.8428 - val_accuracy: 0.7710 - val_loss: 0.8082
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7808 - loss: 0.7860
Test loss: 0.808241069316864
Test accuracy: 0.7710000276565552


In [3]:
from keras_visualizer import visualizer

visualizer(model=model, file_name="pedaret.png", file_format="png")

In [4]:
data = np.array(vectorize_layer([
    'good'
]))
data = model.predict(data)
print(data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
[[-4.9715295e+00  3.0218145e-01 -9.8412442e-01  4.1612689e-03
   1.0090548e+00  3.0174694e+00]
 [-4.9715290e+00  3.0218151e-01 -9.8412478e-01  4.1612708e-03
   1.0090549e+00  3.0174694e+00]
 [-4.9715295e+00  3.0218151e-01 -9.8412454e-01  4.1612876e-03
   1.0090549e+00  3.0174694e+00]
 [-4.9715300e+00  3.0218157e-01 -9.8412478e-01  4.1613304e-03
   1.0090549e+00  3.0174694e+00]]
