In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential

# Load the CSV dataset and select only 'poem_text' and 'poem_title' columns
file_path = '/content/drive/My Drive/all_poems.csv'
df = pd.read_csv(file_path, usecols=['poem_text', 'poem_title'])

df = df.sample(random_state=42)

# Filter out rows with missing or empty poem_text values
df = df.dropna(subset=['poem_text'])

# Extract the poem text and title columns
poem_text = df['poem_text'].values

# Initialize and fit a tokenizer on the poem_text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(poem_text)

# Convert text to sequences for poem_text
sequences_text = tokenizer.texts_to_sequences(poem_text)

# Create input sequences and target sequences for poem_text using NumPy
input_sequences_text = []
target_sequences_text = []

for sequence in sequences_text:
    for i in range(1, len(sequence)):
        input_seq = sequence[:i]
        target_seq = sequence[i]
        input_sequences_text.append(input_seq)
        target_sequences_text.append(target_seq)

# Pad sequences to a fixed length for poem_text
max_sequence_length_text = 100  # You can adjust this value
input_sequences_text = pad_sequences(input_sequences_text, maxlen=max_sequence_length_text, padding='pre')
target_sequences_text = np.array(target_sequences_text)  # Convert to NumPy array

# Define and compile the model for poem_text
vocab_size_text = len(tokenizer.word_index) + 1
embedding_dim = 128

model_text = Sequential([
    Embedding(input_dim=vocab_size_text, output_dim=embedding_dim, input_length=max_sequence_length_text),
    LSTM(128),
    Dense(vocab_size_text, activation='softmax')
])

model_text.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

# Train the model using the dataset
batch_size = 64
dataset = tf.data.Dataset.from_tensor_slices((input_sequences_text, target_sequences_text))
dataset = dataset.shuffle(buffer_size=len(input_sequences_text)).batch(batch_size)
model_text.fit(dataset, epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7aa27fec8310>

In [11]:
import numpy as np

# ... (previous code)

# Initialize a dictionary to map word indices to words
index_to_word = {index: word for word, index in tokenizer.word_index.items()}

# Generate poetry for poem_text
seed_text = "البركة"  # Customize your seed text for poem_text
generated_poem = seed_text

# Define the number of words you want to generate in the poem
num_words_to_generate = 20 # Adjust the number of words as needed

# Generate the poem word by word
for _ in range(num_words_to_generate):
    # Tokenize the current generated text
    seed_sequence = tokenizer.texts_to_sequences([seed_text])[0]

    # Pad the seed sequence to match the model's input length
    padded_sequence = pad_sequences([seed_sequence], maxlen=max_sequence_length_text, padding='pre')

    # Predict the probabilities of the next word using the model
    predicted_word_probabilities = model_text.predict(padded_sequence, verbose=0)

    # Sample the next word based on predicted probabilities
    predicted_word_index = np.random.choice(len(predicted_word_probabilities[0]), p=predicted_word_probabilities[0])

    # Convert the predicted word index back to a word
    predicted_word = index_to_word.get(predicted_word_index, "")

    # Append the predicted word to the generated poem
    generated_poem += " " + predicted_word

    # Update the seed text with the new word
    seed_text += " " + predicted_word

# Print the generated poem
print(generated_poem)


البركة في قلبي فك عينك لا انس كل احد الا اوفي من مختفيا فاذا ان لم تكن تجلدي يصلي ورافقني جسدي
