In [72]:
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Embedding
from keras.layers import LSTM, SpatialDropout1D
from keras.datasets import imdb

In [65]:
np.random.seed(42)
max_features = 8000
maxlen = 100
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

In [70]:
np.random.seed(42)
model = Sequential()
model.add(Embedding(max_features, 32))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(75, dropout=0.2, recurrent_dropout=0.2)) 
model.add(Dense(1, activation="sigmoid"))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [71]:
model.fit(X_train, y_train, batch_size=16, epochs=5,validation_data=(X_test, y_test), verbose=1)
scores = model.evaluate(X_test, y_test, batch_size=16)
print("Точность на тестовых данных: %.2f%%" % (scores[1] * 100))

Epoch 1/5
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 71ms/step - accuracy: 0.7607 - loss: 0.4941 - val_accuracy: 0.8312 - val_loss: 0.3872
Epoch 2/5
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 71ms/step - accuracy: 0.8494 - loss: 0.3548 - val_accuracy: 0.8449 - val_loss: 0.3667
Epoch 3/5
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 79ms/step - accuracy: 0.8728 - loss: 0.3112 - val_accuracy: 0.8436 - val_loss: 0.3669
Epoch 4/5
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 81ms/step - accuracy: 0.8924 - loss: 0.2679 - val_accuracy: 0.8447 - val_loss: 0.3875
Epoch 5/5
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 82ms/step - accuracy: 0.9068 - loss: 0.2338 - val_accuracy: 0.8446 - val_loss: 0.3789
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.8446 - loss: 0.3789
Точность на тестовых данных: 84.46%


In [73]:
model_json = model.to_json()
json_file = open("LSTM.json", "w")
json_file.write(model_json)
json_file.close()
model.save_weights("LSTM.weights.h5")


In [74]:
json_file = open("LSTM.json", "r")
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("LSTM.weights.h5")
loaded_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [82]:
NUM_WORDS = 5000
INDEX_FROM = 3
word_to_id = imdb.get_word_index()
word_to_id = {k:(v+INDEX_FROM) for k,v in word_to_id.items()}
word_to_id["<PAD>"] = 0
word_to_id["<START>"] = 1
word_to_id["<UNK>"] = 2
id_to_word = {value:key for key, value in word_to_id.items()}
review_id = 20
print(' '.join(id_to_word[num] for num in X_train[review_id] ))
print(' '.join(id_to_word[num] for num in sequence.pad_sequences(X_train, maxlen=80)[review_id] ))


it could get any worse and it does the storyline is so predictable it seems written by a high school <UNK> class the sets are pathetic but <UNK> better than the <UNK> and the acting is wooden br br the <UNK> <UNK> seems to have been stolen from the props <UNK> of <UNK> <UNK> there didn't seem to be a single original idea in the whole movie br br i found this movie to be so bad that i laughed most of the way through br br <UNK> <UNK> should hang his head in shame he obviously needed the money
<UNK> class the sets are pathetic but <UNK> better than the <UNK> and the acting is wooden br br the <UNK> <UNK> seems to have been stolen from the props <UNK> of <UNK> <UNK> there didn't seem to be a single original idea in the whole movie br br i found this movie to be so bad that i laughed most of the way through br br <UNK> <UNK> should hang his head in shame he obviously needed the money


In [83]:
tester = "A masterclass in film making, is The Godfather a contender for the best film of all time?I'd argue the case that it is, this is the ultimate gangster movie. Before you panic at the thought of a film being almost three hours long, you needn't, you won't even notice the time, it flies by. Production values are incredible, it looks sublime the whole way through, it's so well produced, at roughly fifty years old it puts many new films to shame. Brandon, Pacino and Castellano, just a few of the Incredible performances, I could add a whole lot more. If you're considering buying a hard copy, I would recommend it on blu ray, it is sharper than the dvd, there is a difference. This film has had a huge influence down the years, it is still, and will forever be, one of the greatest, 10/10."


In [92]:
def encode_review(text):
    words = text.lower().replace('.', '').replace(',', '').replace("'", "").split()
    encoded = [1]
    for w in words:
        idx = word_to_id.get(w, 2)
        if idx >= max_features:
            idx = 2
        encoded.append(idx)
    return encoded
encoded_review = encode_review(tester)
padded_review = sequence.pad_sequences([encoded_review], maxlen=maxlen)
prediction = loaded_model.predict(padded_review)
sentiment = "+" if prediction[0][0] > 0.5 else "-"
print(f"Результат: {sentiment}")
print(f"Оценка: {prediction[0][0]:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Результат: +
Оценка: 0.8111


In [93]:
tester2 = "I'm not surprised that so many people fell for this one. When I was watching this movie, a couple viewers next to me sobbed whenever you're 'supposed' to sob -- or at least feel 'touched'. Like when Hunting said he didn't love the girl. Like when Robin Williams' character (sorry I forgot his role's name) was telling Hunting repeatedly 'It's not your fault' (oh Lord, just thinking of that scene gives me the goosebumps). I couldn't have cared less for what would happen to the characters. Many people sob for Hollywood manufactured characters they can't even relate to (think Titanic; Yuck!)... but it really only made me cringe and want to get out of the theatre. I guess I simply refuse to be psychically and emotionally manipulated by all this. Folks it's not me who's being condescending ... those characters are, and for no good reason because they're unreal. Worse yet, nothing is new or surprising. Even Robin Williams' character is all cliched. I gave it 1 out of 10. It's probably not that bad; it's just quite mediocre... but so many people went to the other extreme and gave it a 10 so I figured a single balancing vote won't hurt."


In [94]:
encoded_review2 = encode_review(tester2)
padded_review2 = sequence.pad_sequences([encoded_review2], maxlen=maxlen)
prediction2 = loaded_model.predict(padded_review2)
sentiment2 = "+" if prediction2[0][0] > 0.5 else "-"
print(f"Результат: {sentiment2}")
print(f"Оценка: {prediction2[0][0]:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Результат: -
Оценка: 0.0105
