# Arabic Name Generator with RNNs in Keras

This kernel is just for fun purposes i just wanted to try an idea i had in mind most of the code are extracted from those 2 repos 
https://github.com/antonio-f/Generating-names-with-RNN/blob/master/Generating%20names%20with%20recurrent%20neural%20networks/RNN-task.ipynb <br>
https://github.com/simon-larsson/pokemon-name-generator

In [1]:
import pandas as pd
import numpy as np
import keras
import time
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop,Adam
import numpy as np
import random
import os

Using TensorFlow backend.


In [2]:
dataset = pd.read_csv("/kaggle/input/Arabic_Names.csv")

In [3]:
names = dataset.loc[:,"Arabic_Name"]

In [4]:
step_length = 1   
epochs = 50       
batch_size = 64    
latent_dim = 128   
dropout_rate = 0.2 
verbosity = 0     
gen_amount = 10    

In [5]:
input_names = []
for name in names:
    name = name.rstrip()
    input_names.append(name)

In [6]:
concat_names = '\n'.join(input_names).lower()

chars = sorted(list(set(concat_names)))
num_chars = len(chars)

char2idx = dict((c, i) for i, c in enumerate(chars))
idx2char = dict((i, c) for i, c in enumerate(chars))

max_sequence_length = max([len(name) for name in input_names])

print('Total chars: {}'.format(num_chars))
print('Corpus length:', len(concat_names))
print('Number of names: ', len(input_names))
print('Longest name: ', max_sequence_length)

Total chars: 30
Corpus length: 32675
Number of names:  4511
Longest name:  18


In [7]:
max_sequence_length = 50

In [8]:
sequences = []
next_chars = []
for i in range(0, len(concat_names) - max_sequence_length, step_length):
    sequences.append(concat_names[i: i + max_sequence_length])
    next_chars.append(concat_names[i + max_sequence_length])

num_sequences = len(sequences)

for i in range(20):
    print('X=[{}]   y=[{}]'.replace('\n', ' ').format(sequences[i], next_chars[i]).replace('\n', ' '))

X=[aaban aabid aadil aahil aalam aalee aalim aamil aa]   y=[m]
X=[aban aabid aadil aahil aalam aalee aalim aamil aam]   y=[i]
X=[ban aabid aadil aahil aalam aalee aalim aamil aami]   y=[r]
X=[an aabid aadil aahil aalam aalee aalim aamil aamir]   y=[ ]
X=[n aabid aadil aahil aalam aalee aalim aamil aamir ]   y=[a]
X=[ aabid aadil aahil aalam aalee aalim aamil aamir a]   y=[a]
X=[aabid aadil aahil aalam aalee aalim aamil aamir aa]   y=[q]
X=[abid aadil aahil aalam aalee aalim aamil aamir aaq]   y=[i]
X=[bid aadil aahil aalam aalee aalim aamil aamir aaqi]   y=[b]
X=[id aadil aahil aalam aalee aalim aamil aamir aaqib]   y=[ ]
X=[d aadil aahil aalam aalee aalim aamil aamir aaqib ]   y=[a]
X=[ aadil aahil aalam aalee aalim aamil aamir aaqib a]   y=[a]
X=[aadil aahil aalam aalee aalim aamil aamir aaqib aa]   y=[q]
X=[adil aahil aalam aalee aalim aamil aamir aaqib aaq]   y=[i]
X=[dil aahil aalam aalee aalim aamil aamir aaqib aaqi]   y=[l]
X=[il aahil aalam aalee aalim aamil aamir aaqib aaqil] 

In [9]:
X = np.zeros((num_sequences, max_sequence_length, num_chars), dtype=np.bool)
Y = np.zeros((num_sequences, num_chars), dtype=np.bool)

for i, sequence in enumerate(sequences):
    for j, char in enumerate(sequence):
        X[i, j, char2idx[char]] = 1
    Y[i, char2idx[next_chars[i]]] = 1
    
print('X shape: {}'.format(X.shape))
print('Y shape: {}'.format(Y.shape))
print(X[0])
print(Y[0])

X shape: (32625, 50, 30)
Y shape: (32625, 30)
[[False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [ True False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
[False False False False False False False False False False False False
 False False False False False  True False False False False False False
 False False False False False False]


In [10]:
model = Sequential()
model.add(LSTM(latent_dim, 
               input_shape=(max_sequence_length, num_chars),  
               recurrent_dropout=dropout_rate))
model.add(Dense(units=num_chars, activation='softmax'))

optimizer = Adam(lr=0.01)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer)

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               81408     
_________________________________________________________________
dense_1 (Dense)              (None, 30)                3870      
Total params: 85,278
Trainable params: 85,278
Non-trainable params: 0
_________________________________________________________________


In [11]:
start = time.time()
print('Start training for {} epochs'.format(epochs))
history = model.fit(X, Y, epochs=epochs, batch_size=batch_size, verbose=verbosity)
end = time.time()
print('Finished training - time elapsed:', (end - start)/60, 'min')

Start training for 50 epochs
Finished training - time elapsed: 33.33084577719371 min


In [12]:
def generate_names():
    r = np.random.choice(len(concat_names)-1)
    r2 = r-max_sequence_length

    sequence = concat_names[r2:r-1] + '\n'

    new_names = []
    #print(sequence)
    while len(new_names) < 1:

        x = np.zeros((1, max_sequence_length, num_chars))
        for i, char in enumerate(sequence):
            x[0, i, char2idx[char]] = 1

        probs = model.predict(x, verbose=0)[0]
        probs /= probs.sum()
        next_idx = np.random.choice(len(probs), p=probs)   
        next_char = idx2char[next_idx]   
        sequence = sequence[1:] + next_char

        if next_char == '\n':

            gen_name = [name for name in sequence.split('\n')][1]

            if len(gen_name) > 4 and gen_name[0] == gen_name[1]:
                gen_name = gen_name[1:]

            if len(gen_name) > 4 and len(gen_name) <= 7:

                if gen_name not in input_names + new_names:
                    new_names.append(gen_name.capitalize())
                    return gen_name.capitalize()


In [13]:
for _ in range(20):
    print(generate_names())


Safdah
Khujran
Walifan
Muthan
Muttami
Layah
Shaurah
Hubays
Yayeer
Nehzaat
Rohaid
Mukeera
Asbath
Shurrah
Subara
Reyan
Marbad
Ghaniya
Ilwaz
Shiyad
