In [None]:
 import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense
from tensorflow.keras.models import Sequential

In [None]:
# Load the dataset from the .txt file
with open("/content/textfile.txt") as file:
    lyrics_text = file.read()

In [None]:
# Tokenize the lyrics
tokenizer = Tokenizer()
tokenizer.fit_on_texts([lyrics_text])
total_words = len(tokenizer.word_index) + 1

In [None]:
# Create input sequences in smaller batches
batch_size = 1000
input_sequences = []
token_list = tokenizer.texts_to_sequences([lyrics_text])[0]
for i in range(0, len(token_list), batch_size):
    n_gram_sequence = token_list[i:i+batch_size]
    input_sequences.append(n_gram_sequence)

In [None]:
# Pad sequences
max_sequence_length = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding="pre")

In [None]:
# Create predictors and labels
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

In [None]:
# Build LSTM model
model_lstm = Sequential()
model_lstm.add(Embedding(total_words, 100, input_length=max_sequence_length-1))
model_lstm.add(LSTM(150))
model_lstm.add(Dense(total_words, activation="softmax"))
model_lstm.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train LSTM model
with tf.device('/device:GPU:0'):
    model_lstm.fit(X, y, epochs=5, verbose=1)

NameError: name 'tf' is not defined

In [None]:
# Build Bidirectional LSTM model
model_bidirectional = Sequential()
model_bidirectional.add(Embedding(total_words, 100, input_length=max_sequence_length-1))
model_bidirectional.add(Bidirectional(LSTM(150)))
model_bidirectional.add(Dense(total_words, activation="softmax"))
model_bidirectional.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train Bidirectional LSTM model
with tf.device('/device:GPU:0'):
    model_bidirectional.fit(X, y, epochs=3, verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
import random

# Generate text using LSTM
seed_text = "Anagha is waiting"
next_words = 5
def generate_text(model, seed_text, next_words):
    generated_text = seed_text
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_length-1, padding="pre")
        predicted_probs = model.predict(token_list, verbose=0)
        predicted_index = np.random.choice(len(predicted_probs[0]), p=predicted_probs[0])
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted_index:
                output_word = word
                break
        seed_text += " " + output_word
        generated_text += " " + output_word
    return generated_text

In [None]:
# Generate text using LSTM
lstm_generated_text = generate_text(model_lstm, seed_text, next_words)
print(f"LSTM generated text: {lstm_generated_text}")

LSTM generated text: Anagha is waiting will debt public short spurts


In [None]:
# Generate text using Bidirectional LSTM
bidirectional_generated_text = generate_text(model_bidirectional, seed_text, next_words)
print(f"Bi-LSTM generated text: {bidirectional_generated_text}")

Bi-LSTM generated text: Anagha is waiting falls shoppers the short us
