In [1]:
import sys
import re
import requests
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense,Activation,Dropout,LSTM

In [2]:
r = requests.get("https://cs.stanford.edu/people/karpathy/namesGenUnique.txt")

In [3]:
r

<Response [200]>

In [4]:
raw_text = r.text
raw_text[:100]

'jka\nDillie\nRyine\nCherita\nDasher\nChailine\nFrennide\nGremaley\nPatj\nHandi\nGully\nWennie\nFerentra\nJixandli'

In [5]:
len(raw_text)

52127

In [6]:
print(raw_text[:100])

jka
Dillie
Ryine
Cherita
Dasher
Chailine
Frennide
Gremaley
Patj
Handi
Gully
Wennie
Ferentra
Jixandli


### Processing Text

In [7]:
raw_text = raw_text.replace('\n',' ')

In [8]:
raw_text

'jka Dillie Ryine Cherita Dasher Chailine Frennide Gremaley Patj Handi Gully Wennie Ferentra Jixandlia Slimele Elispor Kathine Masdadina Chilcina Tiz Jolina Corbenton Haustina Delimeet Eppal Molenia Frecki Senny Helphon Saminda Ka Wynther Helsey Misa Lynetta Halonie Bartta Anathea Helricka Enezela Sherlees Randon Nokminia Belli RoWindon Jaena Meralin Orgen Whad Meyne Luchine Alogge Gren Kollette Leath Corth Mariedia Rhul Genylena Akepy Gabdie Jehn Rocelle Joeno Meruann Kainy Diliar Olell Welke Lileen Arti Valira Paynir Juvinge Krady Mogna Shabfoe Mandelina Rosatha Dacine Ull Vonell Elword Angrelly Kodilla Louge Temmie Gerthe Fildhine Olyna Dibfie Harmin Mellina Edongia Lorant Baires Dasher Quengy Kens Paac Jeencia Jaiep Deua Analine Mifalia Conette Dovanice Tyberr Irda Gerfand Auberro Thamosa Babrield Kait Stephepoa Abi Chrristen Stephanna Racky Dones Lys Vira Phela Deney Dorah Avet Ca iunia Jasqquosius Esty Solumote Hilling Gredie Dynae Berdiah Suynn Tile Blian Mancie Tovey JotaLine M

In [9]:
raw_text[:100]

'jka Dillie Ryine Cherita Dasher Chailine Frennide Gremaley Patj Handi Gully Wennie Ferentra Jixandli'

In [10]:
len(set(raw_text))

66

In [11]:
raw_text = re.sub('[-.0-9:]','',raw_text)

In [12]:
raw_text = raw_text.lower()

In [13]:
set(raw_text)

{' ',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z'}

In [14]:
len1 = len(set(raw_text))

In [15]:
len1 

27

In [16]:
chars = sorted(list(set(raw_text)))
arr  =np.arange(0,len1)

char_to_idx = {}
idx_to_char = {}

for i in range(len1):
    char_to_idx[chars[i]] = arr[i]
    idx_to_char[arr[i]] = chars[i]
char_to_idx

{' ': 0,
 'a': 1,
 'b': 2,
 'c': 3,
 'd': 4,
 'e': 5,
 'f': 6,
 'g': 7,
 'h': 8,
 'i': 9,
 'j': 10,
 'k': 11,
 'l': 12,
 'm': 13,
 'n': 14,
 'o': 15,
 'p': 16,
 'q': 17,
 'r': 18,
 's': 19,
 't': 20,
 'u': 21,
 'v': 22,
 'w': 23,
 'x': 24,
 'y': 25,
 'z': 26}

In [17]:
idx_to_char

{0: ' ',
 1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z'}

In [18]:
maxlen = 5
x_data = []
y_data = []
for i in range(0,len(raw_text) - maxlen, 1):
    in_seq = raw_text[i:i + maxlen]
    out_seq = raw_text[i+ maxlen]
    x_data.append([char_to_idx[char]for char in in_seq])
    y_data.append([char_to_idx[out_seq]])


nb_chars = len(x_data)
print(f"Text corpus: {nb_chars}")
print(f"Sequences # ",int(len(x_data)/maxlen))

Text corpus: 52038
Sequences #  10407


In [19]:
x = np.reshape(x_data,(nb_chars,maxlen,1))
x = x/float(len(chars))

In [20]:
x

array([[[0.37037037],
        [0.40740741],
        [0.03703704],
        [0.        ],
        [0.14814815]],

       [[0.40740741],
        [0.03703704],
        [0.        ],
        [0.14814815],
        [0.33333333]],

       [[0.03703704],
        [0.        ],
        [0.14814815],
        [0.33333333],
        [0.44444444]],

       ...,

       [[0.33333333],
        [0.62962963],
        [0.77777778],
        [0.18518519],
        [0.        ]],

       [[0.62962963],
        [0.77777778],
        [0.18518519],
        [0.        ],
        [0.        ]],

       [[0.77777778],
        [0.18518519],
        [0.        ],
        [0.        ],
        [0.        ]]])

In [21]:
np.unique(y_data)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26])

In [22]:
y = tf.keras.utils.to_categorical(y_data)

In [23]:
y[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [24]:
x[0]

array([[0.37037037],
       [0.40740741],
       [0.03703704],
       [0.        ],
       [0.14814815]])

In [25]:
y.shape

(52038, 27)

In [26]:
len(y[1])

27

### Define The model

In [27]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(256,input_shape=(maxlen,1),return_sequences=True),
    tf.keras.layers.LSTM(256,return_sequences=True),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(len(y[1]),activation='softmax')
])

In [28]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 5, 256)            264192    
                                                                 
 lstm_1 (LSTM)               (None, 5, 256)            525312    
                                                                 
 dropout (Dropout)           (None, 5, 256)            0         
                                                                 
 lstm_2 (LSTM)               (None, 64)                82176     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense (Dense)               (None, 27)                1755      
                                                                 
Total params: 873,435
Trainable params: 873,435
Non-trai

### Compiling

In [29]:
model.compile(loss='categorical_crossentropy',optimizer='adam')

In [31]:
filepath = "model_weights_babynames.hdf5"
checkpoint = ModelCheckpoint(filepath,monitor = 'loss', verbose = 1,save_best_only = True, mode = 'min')
model_callbacks = [checkpoint]


model.fit(x,y, epochs = 300, batch_size = 62 ,
callbacks = model_callbacks)

Epoch 1/300
Epoch 1: loss improved from inf to 2.46182, saving model to model_weights_babynames.hdf5
Epoch 2/300
Epoch 2: loss improved from 2.46182 to 2.44376, saving model to model_weights_babynames.hdf5
Epoch 3/300
Epoch 3: loss improved from 2.44376 to 2.42377, saving model to model_weights_babynames.hdf5
Epoch 4/300
Epoch 4: loss improved from 2.42377 to 2.40527, saving model to model_weights_babynames.hdf5
Epoch 5/300
Epoch 5: loss improved from 2.40527 to 2.39260, saving model to model_weights_babynames.hdf5
Epoch 6/300
Epoch 6: loss improved from 2.39260 to 2.37678, saving model to model_weights_babynames.hdf5
Epoch 7/300
Epoch 7: loss improved from 2.37678 to 2.35684, saving model to model_weights_babynames.hdf5
Epoch 8/300
Epoch 8: loss improved from 2.35684 to 2.34753, saving model to model_weights_babynames.hdf5
Epoch 9/300
Epoch 9: loss improved from 2.34753 to 2.32979, saving model to model_weights_babynames.hdf5
Epoch 10/300
Epoch 10: loss improved from 2.32979 to 2.3180

<keras.callbacks.History at 0x1a2090b4550>

In [32]:
pattern = []

seed = 'handi'
for i in seed:
    value = char_to_idx[i]
    pattern.append(value)

In [33]:
print(seed)
n_vocab = len(chars)

handi


In [34]:
for i in range(100):
    X  =np.reshape(pattern,(1,len(pattern),1))
    X = X/float(n_vocab)

# call the already saved model
# model = tf.keras.models.load_model('model_weights_babynames.tf')
model = tf.keras.models.load_model('model_weights_babynames.hdf5')


In [35]:
int_prediction = model.predict(X,verbose=1)



In [36]:
int_prediction

array([[2.5730628e-01, 3.8608679e-01, 6.7312926e-06, 5.7272683e-04,
        1.9956575e-04, 2.3658171e-01, 4.8110915e-06, 4.4448399e-05,
        5.3766718e-07, 3.8764337e-03, 6.1030988e-07, 8.7321159e-06,
        5.9460159e-03, 1.9634504e-05, 8.7677002e-02, 1.1587485e-03,
        1.8109053e-07, 1.0219753e-08, 5.8472808e-04, 4.5799250e-03,
        8.0643123e-04, 2.0192103e-06, 8.1492806e-07, 5.2802779e-08,
        2.6486279e-07, 1.4534505e-02, 3.1118768e-07]], dtype=float32)

In [37]:
index = np.argmax(int_prediction)
prediction =idx_to_char[index]

In [38]:
prediction

'a'

In [39]:
pattern.append(index)
pattern  = pattern[1:len(pattern)]

In [40]:
seed = 'Gully'
print(seed)
n_vocab = len(chars)
for i in range(100):
    X = np.reshape(pattern , (1, len(pattern) , 1))
    X = X/float(n_vocab)
    int_prediction = model.predict(X , verbose = 0)
    index = np.argmax(int_prediction)
    prediction = idx_to_char[index]
    sys.stdout.write(prediction)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

Gully
 margorie patrie tanda eissa carlette anremare sigerna brecka derenisa jenes galley glennilla carlet

In [41]:
seed = 'stepher '
print(seed)
n_vocab = len(chars)
for i in range(100):
    X = np.reshape(pattern , (1, len(pattern) , 1))
    X = X/float(n_vocab)
    int_prediction = model.predict(X , verbose = 0)
    index = np.argmax(int_prediction)
    prediction = idx_to_char[index]
    sys.stdout.write(prediction)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

stepher 
te anremare sigerna brecka derenisa jenes galley glennilla carlette anremare sigerna brecka derenisa

In [42]:
seed = 'glennilla  '
print(seed)
n_vocab = len(chars)
for i in range(100):
    X = np.reshape(pattern , (1, len(pattern) , 1))
    X = X/float(n_vocab)
    int_prediction = model.predict(X , verbose = 0)
    index = np.argmax(int_prediction)
    prediction = idx_to_char[index]
    sys.stdout.write(prediction)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

glennilla  
 jenes galley glennilla carlette anremare sigerna brecka derenisa jenes galley glennilla carlette an