In [2]:
!pip install tensorflow



In [4]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential


In [5]:
# Preprocess the data
texts = ['This is the first document',
         'This document is the second document',
         'And this is the third one',
         'Is this the first document?']
max_words = 20000
max_len = 100

In [6]:
# Tokenize the texts
tokenizer = Tokenizer(num_words=max_words)
sequences = tokenizer.texts_to_sequences(texts)

In [7]:
# Pad the sequences to a fixed length
padded_sequences = pad_sequences(sequences, maxlen=max_len)


In [8]:
# Convert the labels to categorical variables
labels = to_categorical([0, 0, 1, 1])


In [9]:
# Build the model
model = Sequential()
model.add(Embedding(max_words, 128, input_length=max_len))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Dense(2, activation='sigmoid'))




In [10]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
# Fit the model
model.fit(padded_sequences, labels, epochs=5, batch_size=32)

Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 1.0000 - loss: 0.6868
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step - accuracy: 0.5000 - loss: 0.6924
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step - accuracy: 0.2500 - loss: 0.6988
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - accuracy: 0.5000 - loss: 0.6871
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - accuracy: 0.7500 - loss: 0.6817


<keras.src.callbacks.history.History at 0x7e9c56d4eb90>

In [12]:
model.predict(padded_sequences)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step


array([[0.48845097, 0.49999318],
       [0.48845097, 0.49999318],
       [0.48845097, 0.49999318],
       [0.48845097, 0.49999318]], dtype=float32)

# source

- https://spotintelligence.com/2023/01/07/rnn-in-nlp/