<a href="https://colab.research.google.com/github/Ruqyai/MENADD-DL/blob/main/RNN/Arabic_Poems_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Arabic Poems Generator

### 1.0 Load the packages
<hr/>


In [None]:
!pip install tensorflow==2.1.0

Checking the tensorflow version

In [None]:
import tensorflow as tf
print(tf.__version__)

2.1.0


In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers 
import tensorflow.keras.utils as ku 
import numpy as np 

### 2.0 Loading the data
<hr/>


In [None]:
!wget https://raw.githubusercontent.com/Ruqyai/MENADD-DL/main/Data/arabic_poem_generator.txt

In [None]:
data = open('arabic_poem_generator.txt', 'rb').read().decode(encoding='utf-8')
data[0:300]

'لقينا يوم صهباء سريّه\nحناظلة لهم في الحرب نيّه\nلقيناهم بأسياف حداد\nوأسد لا تفرّ من المنيّه\nوكان زعيمهم إذ ذاك ليث\nهزبرا لا يبالي بالرزيّه\nفخلّفناه وسط القاع ملقى\nوها أنا طالب قتل البقيّه\nورحنا بالسيوف نسوق فيهم\nإلى ربوات معضلة خفيّه\nوكم من فارس منهم تركنا\nعليه من صوارمنا قضيّه\nفوارسنا بنو عبس وإنّا\n'

### 3.0 Tokenizing the training data
<hr/>

In [None]:
tokenizer = Tokenizer()
corpus = data.lower().split("\n")
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1
print('Total number of words in corpus:',total_words)

Total number of words in corpus: 8212


### 4.0 Preparing the data for training
<hr/>


In [None]:
# create input sequences using list of tokens
input_sequences = []
for line in corpus:
	token_list = tokenizer.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
		n_gram_sequence = token_list[:i+1]
		input_sequences.append(n_gram_sequence)

# pad sequences 
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# create predictors and label
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

label = ku.to_categorical(label, num_classes=total_words)

#### 5.0 Defining the model
<hr/>


In [None]:
# Defining the model.
model = Sequential()

model.add(Embedding(total_words,100,input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150,return_sequences=True)))
model.add(Dropout(0.18))
model.add(Bidirectional(LSTM(100)))
model.add(Dense(total_words/2,activation='relu',kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(total_words,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer = 'adam',metrics = ['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 100)            821200    
_________________________________________________________________
bidirectional (Bidirectional (None, 8, 300)            301200    
_________________________________________________________________
dropout (Dropout)            (None, 8, 300)            0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 200)               320800    
_________________________________________________________________
dense (Dense)                (None, 4106)              825306    
_________________________________________________________________
dense_1 (Dense)              (None, 8212)              33726684  
Total params: 35,995,190
Trainable params: 35,995,190
Non-trainable params: 0
____________________________________________

#### 6.0 Training the model
<hr/>


In [None]:
 history = model.fit(predictors, label, epochs=20, verbose=1)

Train on 13043 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


#### 7.0 Testing the model
<hr/>
To test the model we have to give 2 inputs:

1. Input text or seed text so the network can start predicting. and,
2. The number of words you want thenetwork to predict. 

In [None]:
seed_text = "كيف"
next_words =8
  
for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([seed_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = model.predict_classes(token_list, verbose=0)
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted:
			output_word = word
			break
	seed_text += " " + output_word
print(seed_text)

كيف السلوّ عليه تضرّم مدام البرق لقحت كان فخري
