<a href="https://colab.research.google.com/github/InowaR/colab/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.preprocessing.sequence import pad_sequences

# Загрузка данных IMDb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

print(y_train[0])
print(y_train[1])

# Преобразование данных в векторы
max_length = max([len(x) for x in x_train])
x_train = pad_sequences(x_train, maxlen=max_length)
x_test = pad_sequences(x_test, maxlen=max_length)

print(max_length)
print(x_train)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
1
0
2494
[[   0    0    0 ...   19  178   32]
 [   0    0    0 ...   16  145   95]
 [   0    0    0 ...    7  129  113]
 ...
 [   0    0    0 ...    4 3586    2]
 [   0    0    0 ...   12    9   23]
 [   0    0    0 ...  204  131    9]]


In [2]:
num_chars = 10000  # Количество уникальных слов
embedding_dim = 64  # Размерность встраивания
lstm_units = 128  # Количество единиц LSTM

In [3]:
model = Sequential()
model.add(Embedding(num_chars, embedding_dim, input_length=max_length))
model.add(LSTM(lstm_units))
model.add(Dense(1, activation='sigmoid'))  # Бинарная классификация (положительный/отрицательный отзыв)

In [4]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [5]:
model.fit(x_train, y_train, epochs=3, validation_split=0.2)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x798c3e4fbcd0>

In [6]:
model.save('model.keras')

In [7]:
loss, accuracy = model.evaluate(x_test, y_test)
print('Тестовая точность:', accuracy)

Тестовая точность: 0.8208400011062622


In [8]:
import re

def preprocess_text(text):

    # Удаление знаков препинания
    text = re.sub(r'[^\w\s]', '', text)

    # Удаление переходов на новую строку
    text = text.replace('\n', ' ')

    # Преобразование в строчные буквы
    text = text.lower()
    return text


In [9]:
text = """After each occasion when I saw it, I define it as masterpiece. sure, many explanations. but something is real impressive - the plot. the science to explore each emotion, expectation, thrill, characters and end. the performances are more than beautiful. the atmosphere - you feel it . the tension - great good point - it is a film about yourself. about fears, appearances. and that does it a magnificent film. and more. a remarkable masterpiece.
"""

new_text = preprocess_text(text)

In [10]:
import numpy as np


def predict(review):
    word_index = imdb.get_word_index()

    list_index = [[word_index[word] for word in review.split()]]

    # Преобразовать отзыв в вектор
    new_review_vec = pad_sequences(list_index , maxlen=max_length)

    # Сделать прогноз
    prediction = model.predict(new_review_vec)

    print(prediction)

predict(new_text)

x1 = [x_train[0]]
print(model.predict(np.array(x1)))

x2 = [x_train[1]]
print(model.predict(np.array(x2)))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[[0.14378855]]
[[0.74895376]]
[[0.02456886]]
