In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
import csv

In [20]:
paragraph = "Recurrent Neural Networks (RNNs) are widely used for next word prediction because they can remember patterns and dependencies in sequences of words. In this task, the model learns the relationship between words by processing text data one word at a time and using its hidden state to retain context. During training, the RNN is given a sequence of words and learns to predict the next word by adjusting its weights over multiple epochs. Once trained, the model can generate text by predicting one word at a time based on the previous words, making it useful for applications like autocomplete, chatbots, and text generation."

In [3]:
with open('rnn_next_word_prediction.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Description'])
    writer.writerow([paragraph])

In [4]:
df = pd.read_csv('rnn_next_word_prediction.csv')

print(df.head())

                                         Description
0  Recurrent Neural Networks (RNNs) are widely us...


In [5]:
# Tokenization
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(df['Description'])
seq = tokenizer.texts_to_sequences(df['Description'])

In [6]:
seq[:10]

[[20,
  21,
  22,
  23,
  24,
  25,
  26,
  8,
  9,
  2,
  27,
  28,
  29,
  10,
  30,
  31,
  3,
  32,
  11,
  33,
  12,
  4,
  11,
  34,
  35,
  1,
  13,
  14,
  1,
  36,
  37,
  4,
  5,
  38,
  6,
  39,
  15,
  2,
  16,
  7,
  17,
  3,
  40,
  18,
  41,
  42,
  19,
  43,
  44,
  45,
  46,
  1,
  47,
  48,
  49,
  7,
  50,
  12,
  4,
  3,
  14,
  19,
  51,
  1,
  9,
  2,
  5,
  52,
  18,
  53,
  54,
  55,
  56,
  57,
  58,
  1,
  13,
  10,
  59,
  6,
  5,
  60,
  15,
  2,
  16,
  7,
  17,
  61,
  62,
  1,
  63,
  4,
  64,
  65,
  66,
  8,
  67,
  68,
  69,
  70,
  3,
  6,
  71]]

In [7]:
tokenizer.word_index

{'the': 1,
 'word': 2,
 'and': 3,
 'words': 4,
 'by': 5,
 'text': 6,
 'a': 7,
 'for': 8,
 'next': 9,
 'can': 10,
 'in': 11,
 'of': 12,
 'model': 13,
 'learns': 14,
 'one': 15,
 'at': 16,
 'time': 17,
 'its': 18,
 'to': 19,
 'recurrent': 20,
 'neural': 21,
 'networks': 22,
 'rnns': 23,
 'are': 24,
 'widely': 25,
 'used': 26,
 'prediction': 27,
 'because': 28,
 'they': 29,
 'remember': 30,
 'patterns': 31,
 'dependencies': 32,
 'sequences': 33,
 'this': 34,
 'task': 35,
 'relationship': 36,
 'between': 37,
 'processing': 38,
 'data': 39,
 'using': 40,
 'hidden': 41,
 'state': 42,
 'retain': 43,
 'context': 44,
 'during': 45,
 'training': 46,
 'rnn': 47,
 'is': 48,
 'given': 49,
 'sequence': 50,
 'predict': 51,
 'adjusting': 52,
 'weights': 53,
 'over': 54,
 'multiple': 55,
 'epochs': 56,
 'once': 57,
 'trained': 58,
 'generate': 59,
 'predicting': 60,
 'based': 61,
 'on': 62,
 'previous': 63,
 'making': 64,
 'it': 65,
 'useful': 66,
 'applications': 67,
 'like': 68,
 'autocomplete': 69,


In [8]:
X = []
y = []
total_words_dropped = 0

for i in seq:
    if len(i) > 1:
        for index in range(1, len(i)):
            X.append(i[:index])
            y.append(i[index])
    else:
        total_words_dropped += 1

print("Total Single Words Dropped are:", total_words_dropped)

Total Single Words Dropped are: 0


In [9]:
X = tf.keras.preprocessing.sequence.pad_sequences(X)
y = tf.keras.utils.to_categorical(y)

In [10]:
vocab_size = len(tokenizer.word_index) + 1
vocab_size

72

In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, 14),
    tf.keras.layers.LSTM(100, return_sequences=True),
    tf.keras.layers.LSTM(100),
    tf.keras.layers.Dense(100,activation='relu'),
    tf.keras.layers.Dense(vocab_size,activation='softmax'),
])


In [12]:
model.summary()

In [13]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [15]:
model.fit(X, y, epochs=250)

Epoch 1/250
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.0686 - loss: 4.2206
Epoch 2/250
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.0588 - loss: 4.1398
Epoch 3/250
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.0490 - loss: 4.1333
Epoch 4/250
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.0686 - loss: 4.0764  
Epoch 5/250
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.0686 - loss: 4.0242
Epoch 6/250
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.0686 - loss: 3.9906
Epoch 7/250
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.0784 - loss: 3.9513
Epoch 8/250
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - accuracy: 0.0784 - loss: 3.9060
Epoch 9/250
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x24d65b58470>

In [16]:
model.save('nwp.h5')



In [17]:
import os
print("Current working directory:", os.getcwd())

Current working directory: c:\Users\Admin\AVScode\AI\RNN


In [18]:
vocab_array = np.array(list(tokenizer.word_index.keys()))

vocab_array

array(['the', 'word', 'and', 'words', 'by', 'text', 'a', 'for', 'next',
       'can', 'in', 'of', 'model', 'learns', 'one', 'at', 'time', 'its',
       'to', 'recurrent', 'neural', 'networks', 'rnns', 'are', 'widely',
       'used', 'prediction', 'because', 'they', 'remember', 'patterns',
       'dependencies', 'sequences', 'this', 'task', 'relationship',
       'between', 'processing', 'data', 'using', 'hidden', 'state',
       'retain', 'context', 'during', 'training', 'rnn', 'is', 'given',
       'sequence', 'predict', 'adjusting', 'weights', 'over', 'multiple',
       'epochs', 'once', 'trained', 'generate', 'predicting', 'based',
       'on', 'previous', 'making', 'it', 'useful', 'applications', 'like',
       'autocomplete', 'chatbots', 'generation'], dtype='<U12')

In [19]:
def make_prediction(text, n_words):
    for i in range(n_words):
        text_tokenize = tokenizer.texts_to_sequences([text])
        text_padded = tf.keras.preprocessing.sequence.pad_sequences(text_tokenize, maxlen=14)
        prediction = np.squeeze(np.argmax(model.predict(text_padded), axis=-1))
        prediction = str(vocab_array[prediction - 1])
        print(vocab_array[np.argsort(model.predict(text_padded)) - 1].ravel()[:-3])
        text += " " + prediction
    return text

In [None]:
make_prediction("recurrent", 10)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
['learns' 'dependencies' 'by' 'task' 'sequence' 'retain' 'generation'
 'useful' 'once' 'model' 'during' 'previous' 'state' 'and' 'because'
 'multiple' 'prediction' 'chatbots' 'data' 'weights' 'recurrent' 'this'
 'to' 'generation' 'using' 'predict' 'training' 'relationship' 'in'
 'adjusting' 'is' 'autocomplete' 'over' 'patterns' 'predicting'
 'applications' 'a' 'sequences' 'processing' 'of' 'based' 'between' 'rnn'
 'its' 'it' 'at' 'generate' 'time' 'making' 'given' 'text' 'like'
 'trained' 'the' 'on' 'they' 'one' 'hidden' 'remember' 'words' 'context'
 'epochs' 'can' 'word' 'next' 'for' 'used' 'widely' 'are']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
['learns' 'dependencies' 'by' 'task' 'sequence' 'retain' 'once'
 'generation' 'useful' 'model' '

'recurrent neural networks networks are widely used for word word because'

In [None]:
make_prediction("neural", 10)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
['learns' 'dependencies' 'by' 'task' 'sequence' 'retain' 'generation'
 'useful' 'once' 'model' 'during' 'previous' 'state' 'and' 'because'
 'multiple' 'prediction' 'chatbots' 'data' 'weights' 'recurrent' 'this'
 'to' 'generation' 'using' 'predict' 'relationship' 'training' 'in'
 'adjusting' 'autocomplete' 'is' 'over' 'patterns' 'predicting'
 'applications' 'a' 'sequences' 'processing' 'of' 'based' 'between' 'rnn'
 'it' 'its' 'at' 'generate' 'time' 'making' 'given' 'text' 'like' 'the'
 'trained' 'on' 'they' 'one' 'hidden' 'remember' 'words' 'context'
 'epochs' 'can' 'word' 'next' 'for' 'used' 'widely' 'are']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
['learns' 'dependencies' 'by' 'task' 'sequence' 'retain' 'once'
 'generation' 'model' 'useful' 'p

'neural neural networks networks are widely used for word prediction because'