In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [2]:
# Load the text file
with open('RNN_text_dataset.txt', 'r', encoding='utf-8') as file:
    text_data = file.read()

In [3]:
text_data

'Recurrent Neural Network RNN is a type of Neural Network where the output from the previous step is fed as input to the current step. In traditional neural networks, all the inputs and outputs are independent of each other. Still, in cases when it is required to predict the next word of a sentence, the previous words are required and hence there is a need to remember the previous words. Thus RNN came into existence, which solved this issue with the help of a Hidden Layer. The main and most important feature of RNN is its Hidden state, which remembers some information about a sequence. The state is also referred to as Memory State since it remembers the previous input to the network. It uses the same parameters for each input as it performs the same task on all the inputs or hidden layers to produce the output. This reduces the complexity of parameters, unlike other neural networks.\n\nArtificial neural networks that do not have looping nodes are called feed forward neural networks. Be

In [4]:
# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text_data])
sequence_data = tokenizer.texts_to_sequences([text_data])
word_index = tokenizer.word_index

In [5]:
# Pad sequences
max_sequence_len = max([len(x) for x in sequence_data])
padded_sequence = pad_sequences(sequence_data, maxlen=max_sequence_len)

In [6]:
# Output tokenized data for review
print("Tokenized sequence:", padded_sequence)
print("Word index:", word_index)


Tokenized sequence: [[ 26   5   2  19   4   7  20   3   5   2  30   1  15  31   1  21  27   4
   50  10  12   6   1  85  27   9  86   5  28  39   1  22  11  40  16  51
    3  32  52  87   9  88  89  23   4  53   6  33   1  90  91   3   7  41
    1  21  24  16  53  11  92  34   4   7  93   6  94   1  21  24  95  19
   96  97  98  35  99   8 100  54   1 101   3   7  13  36   1 102  11  55
  103 104   3  19   4 105  13  14  35  56 106  42  57   7 107   1  14   4
   43  58   6  10  59  14 108  23  56   1  21  12   6   1   2  23 109   1
   44  60  29  32  12  10  23 110   1  44 111 112  39   1  22 113  13  61
    6 114   1  15   8 115   1 116   3  60 117  52   5  28 118   5  28  62
  119  63 120 121 122  16  45 123  64   5  28 124  39  42   4  37 125  64
    8 126   3   5   2   4  43  58   6  10   7 127  36   5   2  42 128  31
    1  12  36   6   1  15  36  65 129  66  13  61  16 130  65 131   9   7
  132   5   2 133  28  16 134  29  46 135 136  29  67  30  12  11  15  16
   51 137 138 139 

In [7]:
# Define the RNN model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(word_index) + 1, output_dim=128, input_length=max_sequence_len),
    tf.keras.layers.SimpleRNN(128),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])



In [8]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
# Summary of the model
model.summary()


In [10]:
# Model name lookup
model_names = ["RNN", "LSTM", "GRU"]
found_models = [model for model in model_names if model.lower() in text_data.lower()]

print("Models mentioned in the text:", found_models)


Models mentioned in the text: ['RNN', 'LSTM', 'GRU']


In [13]:
labels = np.array([1])

if len(labels) > 1:
    # Split data into training and testing sets (use only if multiple samples are available)
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(padded_sequence, labels, test_size=0.2, random_state=42)
else:
    # If only one sample, use the full dataset for training
    X_train, X_test = padded_sequence, padded_sequence
    y_train, y_test = labels, labels



In [15]:
# Define the RNN model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(word_index) + 1, output_dim=128, input_length=max_sequence_len),
    tf.keras.layers.SimpleRNN(128),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [16]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [17]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), verbose=2)

Epoch 1/10
1/1 - 8s - 8s/step - accuracy: 1.0000 - loss: 0.6254 - val_accuracy: 1.0000 - val_loss: 0.3275
Epoch 2/10
1/1 - 0s - 310ms/step - accuracy: 1.0000 - loss: 0.3275 - val_accuracy: 1.0000 - val_loss: 0.2118
Epoch 3/10
1/1 - 0s - 317ms/step - accuracy: 1.0000 - loss: 0.2118 - val_accuracy: 1.0000 - val_loss: 0.1377
Epoch 4/10
1/1 - 0s - 315ms/step - accuracy: 1.0000 - loss: 0.1377 - val_accuracy: 1.0000 - val_loss: 0.1120
Epoch 5/10
1/1 - 0s - 309ms/step - accuracy: 1.0000 - loss: 0.1120 - val_accuracy: 1.0000 - val_loss: 0.0954
Epoch 6/10
1/1 - 0s - 333ms/step - accuracy: 1.0000 - loss: 0.0954 - val_accuracy: 1.0000 - val_loss: 0.1983
Epoch 7/10
1/1 - 0s - 334ms/step - accuracy: 1.0000 - loss: 0.1983 - val_accuracy: 1.0000 - val_loss: 0.3176
Epoch 8/10
1/1 - 0s - 325ms/step - accuracy: 1.0000 - loss: 0.3176 - val_accuracy: 1.0000 - val_loss: 0.2329
Epoch 9/10
1/1 - 0s - 325ms/step - accuracy: 1.0000 - loss: 0.2329 - val_accuracy: 1.0000 - val_loss: 0.0447
Epoch 10/10
1/1 - 0s -

In [18]:
# Evaluate the model (since it's the same data for training and testing in this case)
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test accuracy:", test_acc)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - accuracy: 1.0000 - loss: 0.0329
Test accuracy: 1.0
