In [66]:
import numpy as np
from tensorflow import keras
from keras.datasets import imdb
from keras.utils import pad_sequences
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM, SimpleRNN, Dense, Dropout
from sklearn.model_selection import train_test_split

In [67]:
# load dataset

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

In [68]:
max_features = 10000
maxlen = 500
batch_size = 32

**RNN**

In [69]:
X = np.hstack([x_train, x_test])
y = np.hstack([y_train, y_test])

In [70]:
X_padded = pad_sequences(X, maxlen=maxlen)

In [71]:
x_train, x_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

In [89]:
# model creation

model = Sequential()
model.add(Embedding(max_features, 64))
model.add(SimpleRNN(128, activation="tanh"))
model.add(Dense(32, activation='relu'))
#model.add(Dropout(0.3))
model.add(Dense(1, activation="sigmoid"))

In [90]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [91]:
history = model.fit(x_train, y_train, epochs=20, batch_size=128, validation_split=0.2)

Epoch 1/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 63ms/step - accuracy: 0.5116 - loss: 0.6970 - val_accuracy: 0.5539 - val_loss: 0.6881
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 60ms/step - accuracy: 0.5513 - loss: 0.6837 - val_accuracy: 0.6204 - val_loss: 0.6340
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 60ms/step - accuracy: 0.6882 - loss: 0.5866 - val_accuracy: 0.6607 - val_loss: 0.6050
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 60ms/step - accuracy: 0.7465 - loss: 0.5050 - val_accuracy: 0.7385 - val_loss: 0.5356
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 60ms/step - accuracy: 0.8314 - loss: 0.3844 - val_accuracy: 0.7894 - val_loss: 0.4751
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 60ms/step - accuracy: 0.8723 - loss: 0.3209 - val_accuracy: 0.6440 - val_loss: 0.6428
Epoch 7/20
[1m2

In [92]:
# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print('Test loss SimpleRNN:', loss)
print('Test accuracy SimpleRNN:', accuracy)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - accuracy: 0.7980 - loss: 0.7851
Test loss SimpleRNN: 0.7950771450996399
Test accuracy SimpleRNN: 0.7957000136375427


**LSTM**

In [49]:
# model creation

model = Sequential()
model.add(Embedding(max_features, 64))
model.add(LSTM(32))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation="sigmoid"))

In [50]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [51]:
history = model.fit(x_train, y_train, epochs=20, batch_size=128, validation_split=0.2)

Epoch 1/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 25ms/step - accuracy: 0.6987 - loss: 0.5488 - val_accuracy: 0.8834 - val_loss: 0.2892
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 23ms/step - accuracy: 0.9116 - loss: 0.2275 - val_accuracy: 0.8716 - val_loss: 0.3102
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 21ms/step - accuracy: 0.9171 - loss: 0.2171 - val_accuracy: 0.8849 - val_loss: 0.2849
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - accuracy: 0.9519 - loss: 0.1362 - val_accuracy: 0.8746 - val_loss: 0.3127
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 23ms/step - accuracy: 0.9626 - loss: 0.1096 - val_accuracy: 0.8696 - val_loss: 0.3365
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 21ms/step - accuracy: 0.9688 - loss: 0.0923 - val_accuracy: 0.8739 - val_loss: 0.4733
Epoch 7/20
[1m250

In [52]:
# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print('Test loss LSTM:', loss)
print('Test accuracy LSTM:', accuracy)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.8820 - loss: 0.7561
Test loss LSTM: 0.7644813060760498
Test accuracy LSTM: 0.8805000185966492


**BRNN (Bidirectional RNN)**

In [53]:
# model creation

model = Sequential()
model.add(Embedding(max_features, 64))
model.add(Bidirectional(LSTM(32)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [54]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [55]:
history = model.fit(x_train, y_train, epochs=20, batch_size=128, validation_split=0.2)

Epoch 1/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 41ms/step - accuracy: 0.7082 - loss: 0.5258 - val_accuracy: 0.8696 - val_loss: 0.3109
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9133 - loss: 0.2258 - val_accuracy: 0.8794 - val_loss: 0.3108
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 36ms/step - accuracy: 0.9367 - loss: 0.1730 - val_accuracy: 0.8690 - val_loss: 0.3624
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 40ms/step - accuracy: 0.9427 - loss: 0.1572 - val_accuracy: 0.8761 - val_loss: 0.3688
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 39ms/step - accuracy: 0.9664 - loss: 0.0970 - val_accuracy: 0.8819 - val_loss: 0.3331
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 39ms/step - accuracy: 0.9727 - loss: 0.0832 - val_accuracy: 0.8720 - val_loss: 0.4450
Epoch 7/20
[1m25

In [56]:
# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print('Test loss BRNN:', loss)
print('Test accuracy BRNN:', accuracy)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - accuracy: 0.8771 - loss: 0.6973
Test loss BRNN: 0.7042281031608582
Test accuracy BRNN: 0.8759999871253967


**DRNN (Deep RNN)**

In [57]:
# model creation

model = Sequential()
model.add(Embedding(max_features, 32))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [58]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [93]:
history = model.fit(x_train, y_train, epochs=20, batch_size=128, validation_split=0.2)

Epoch 1/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 60ms/step - accuracy: 0.9850 - loss: 0.0521 - val_accuracy: 0.7952 - val_loss: 0.8683
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 59ms/step - accuracy: 0.9825 - loss: 0.0573 - val_accuracy: 0.7905 - val_loss: 0.8106
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 61ms/step - accuracy: 0.9811 - loss: 0.0611 - val_accuracy: 0.7874 - val_loss: 0.9046
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 59ms/step - accuracy: 0.9890 - loss: 0.0393 - val_accuracy: 0.8043 - val_loss: 0.8089
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 59ms/step - accuracy: 0.9852 - loss: 0.0524 - val_accuracy: 0.8040 - val_loss: 0.8426
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 60ms/step - accuracy: 0.9815 - loss: 0.0604 - val_accuracy: 0.7791 - val_loss: 0.7686
Epoch 7/20
[1m2

In [94]:
# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print('Test loss DRNN:', loss)
print('Test accuracy DRNN:', accuracy)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 22ms/step - accuracy: 0.7529 - loss: 0.6936
Test loss DRNN: 0.7061485648155212
Test accuracy DRNN: 0.746999979019165


Вывод:
Выходила проблема с переобучением.

В среднем результат Test accuracy был 88, но в попытках улучшить, все стало хуже.