In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
# Load the text data from a file
file_path = '/content/drive/MyDrive/dataset.txt'  # Update this with the actual file path
with open(file_path, 'r') as file:
    text_data = file.readlines()

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# Clean up the text data (removing newline characters)
text_data = [line.strip() for line in text_data if line.strip() != '']

print(text_data)  # To verify the dataset is loaded properly

['If it rains, the ground will be wet unless there is something covering it.', 'John has more apples than Sara, and Sara has more apples than Bob, so John has the most apples.', 'Birds cannot fly without wings because they need wings to lift themselves into the air.', 'When a person is running fast and suddenly stops, they may lose balance and stumble or fall.', 'When you put ice in hot water, the ice melts because heat causes the ice to turn into water.', 'Plants need water to grow, but too much water can harm them. If you water them correctly, they will grow.', 'If two people push a box from opposite sides with equal force, the box will not move because the forces cancel each other out.', 'When a light is turned off, the room becomes dark unless there is another light source.', 'Studying for an exam helps you understand the material better, making it easier to perform well.', "Eating too much sugar can lead to health problems like weight gain and high blood sugar, so it's harmful to 

In [6]:
# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
total_words = len(tokenizer.word_index) + 1

In [7]:
# Create input sequences using the text data
input_sequences = []
for line in text_data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

In [8]:
# Pad sequences to ensure they are all the same length
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

In [9]:
# Split the data into input (X) and output (y)
X, y = input_sequences[:,:-1], input_sequences[:,-1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

print(f"X shape: {X.shape}, y shape: {y.shape}")

X shape: (28334, 175), y shape: (28334, 4047)


In [10]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, GRU, Dense, AdditiveAttention, Input
from tensorflow.keras.optimizers import Adam

In [11]:
# Model parameters
embedding_dim = 128  # Higher dimension for better token representation
gru_units = 256      # More units to handle complex reasoning and longer contexts


In [12]:
# Input layer
inputs = Input(shape=(max_sequence_len-1,))

# Embedding layer
embedding_layer = Embedding(total_words, embedding_dim, input_length=max_sequence_len-1)(inputs)

# GRU layer (returning sequences so that we can use attention over time steps)
gru_output = GRU(gru_units, return_sequences=True)(embedding_layer)

# Attention mechanism (Additive Attention over GRU output)
attention_output = AdditiveAttention()([gru_output, gru_output])

# Flattening the attention output for final prediction
flat_output = tf.keras.layers.Flatten()(attention_output)

# Dense output layer for next-word prediction
output_layer = Dense(total_words, activation='softmax')(flat_output)



In [13]:
# Defining the model
model = Model(inputs=inputs, outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

In [14]:
# Model summary
model.summary()

In [15]:
# Train the model
model.fit(X, y, epochs=20,batch_size=64, verbose=1)


Epoch 1/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 293ms/step - accuracy: 0.0300 - loss: 7.0772
Epoch 2/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 294ms/step - accuracy: 0.0394 - loss: 6.2391
Epoch 3/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 294ms/step - accuracy: 0.0562 - loss: 5.5153
Epoch 4/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 294ms/step - accuracy: 0.0868 - loss: 4.7658
Epoch 5/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 295ms/step - accuracy: 0.1414 - loss: 4.0922
Epoch 6/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 295ms/step - accuracy: 0.2114 - loss: 3.5080
Epoch 7/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 295ms/step - accuracy: 0.2953 - loss: 2.9790
Epoch 8/20
[1m443/443[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 295ms/step - accuracy: 0.3859 - loss: 2.4778
Epoch 9/

<keras.src.callbacks.history.History at 0x79a4b13e8910>

In [16]:
def generate_text(seed_text, next_words, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted_probs = model.predict(token_list, verbose=0)
        predicted = np.argmax(predicted_probs, axis=-1)
        output_word = tokenizer.index_word.get(predicted[0], '')
        seed_text += " " + output_word
    return seed_text

In [20]:
# Generate text based on a seed text
seed_text = "who are you"
generated_text = generate_text(seed_text, next_words=20, max_sequence_len=max_sequence_len)
print(generated_text)

who are you apply to down anxiety recognizing its support saying whatever be facing it creates a perfect way for nature lovers the


In [19]:
# Save the entire model to a file
model.save("/content/drive/MyDrive/LLM/rnn_model_with_attention.h5")




In [None]:
# Save only the model weights
model.save_weights("/content/drive/MyDrive/LLM/rnn.weights.h5")


In [None]:
from tensorflow.keras.models import load_model

# Load the model
loaded_model = load_model("/content/drive/MyDrive/LLM/rnn_model_with_attention.h5")

# The model is now ready for inference


In [None]:
# Define the model architecture again (same as used during training)
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, GRU, Dense, AdditiveAttention, Input
from tensorflow.keras.optimizers import Adam

# Model parameters
embedding_dim = 128
gru_units = 256
total_words = 10000  # Make sure to use the correct value
max_sequence_len = 50  # Same as during training

# Define the model architecture
inputs = Input(shape=(max_sequence_len-1,))
embedding_layer = Embedding(total_words, embedding_dim, input_length=max_sequence_len-1)(inputs)
gru_output = GRU(gru_units, return_sequences=True)(embedding_layer)
attention_output = AdditiveAttention()([gru_output, gru_output])
flat_output = tf.keras.layers.Flatten()(attention_output)
output_layer = Dense(total_words, activation='softmax')(flat_output)

# Rebuild the model
loaded_model = Model(inputs=inputs, outputs=output_layer)
loaded_model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

# Load the previously saved weights
loaded_model.load_weights("/content/drive/MyDrive/LLM/rnn.weights.h5")

# The model is now ready for inference


  saveable.load_own_variables(weights_store.get(inner_path))


ValueError: A total of 2 objects could not be loaded. Example error message for object <Embedding name=embedding_2, built=True>:

The shape of the target variable and the shape of the target value in `variable.assign(value)` must match. variable.shape=(10000, 128), Received: value.shape=(4047, 128). Target variable: <KerasVariable shape=(10000, 128), dtype=float32, path=embedding_2/embeddings>

List of objects that could not be loaded:
[<Embedding name=embedding_2, built=True>, <Dense name=dense_2, built=True>]