<a href="https://colab.research.google.com/github/Xynnect/MashineLearningGoogleColabs/blob/master/char_rnn_tensorflow_js.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using a neural network to generate startup names

Author: Eliot Andres (http://twitter.com/eliotandres)

Original notebooks: https://github.com/CSCfi/machine-learning-scripts/blob/master/slurm/keras-titles-rnn.py and https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py

In [0]:
import pandas as pd
import numpy as np
from IPython.display import HTML

from __future__ import print_function
import keras
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM, RNN, SimpleRNNCell, SimpleRNN
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io


In [36]:
companies = pd.read_csv('./companies.csv', header=None)
companies.head()


Unnamed: 0,0
0,Hashplay Inc.
1,New Incentives
2,GrabJobs
3,MediBookr
4,MelissaWithLove.co


In [37]:

names = companies[0].values
text = '\n'.join(names)

chars = sorted(list(set(text)))
print('total chars: {}'.format(len(chars)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))



total chars: 150


In [38]:
print('Corpus length:', len(text), 'lines:', len(names))
print('First 10 lines:', names[:10])
print('Number of unique chars:', len(chars))


Corpus length: 2560407 lines: 172488
First 10 lines: ['Hashplay Inc.' 'New Incentives' 'GrabJobs' 'MediBookr'
 'MelissaWithLove.co' 'Starting 11' 'The CarShare Guy' 'Allahabad Bank'
 'Anlaiye' 'Any Time Loan']
Number of unique chars: 150


In [39]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 10
step = 3

sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))
print('First 10 sequences and next chars:')
for i in range(10):
    print('[{}]:[{}]'.format(sentences[i], next_chars[i]))



Number of sequences: 853466
First 10 sequences and next chars:
[Hashplay I]:[n]
[hplay Inc.]:[
]
[ay Inc.
Ne]:[w]
[Inc.
New I]:[n]
[.
New Ince]:[n]
[ew Incenti]:[v]
[Incentives]:[
]
[entives
Gr]:[a]
[ives
GrabJ]:[o]
[s
GrabJobs]:[
]


In [40]:
print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1
print('Size of X: {:.2f} MB'.format(X.nbytes/1024/1024))
print('Size of y: {:.2f} MB'.format(y.nbytes/1024/1024))



Vectorization...
Size of X: 1220.89 MB
Size of y: 122.09 MB


In [41]:
# ### Initialization
# 
# Now we are ready to create a recurrent model.  Keras contains three types of recurrent layers:
# 
#  * `SimpleRNN`, a fully-connected RNN where the output is fed back to input.
#  * `LSTM`, the Long-Short Term Memory unit layer.
#  * `GRU`, the Gated Recurrent Unit layer.
# 
# See https://keras.io/layers/recurrent/ for more information.

# Number of hidden units to use:
nb_units = 64

model = Sequential()

# Recurrent layers supported: SimpleRNN, LSTM, GRU:
model.add(LSTM(nb_units, input_shape=(maxlen, len(chars))))

# To stack multiple RNN layers, all RNN layers except the last one need
# to have "return_sequences=True".  An example of using two RNN layers:
#model.add(SimpleRNN(16,
#                    input_shape=(maxlen, len(chars)),
#                    return_sequences=True))
#model.add(SimpleRNN(32))

model.add(Dense(units=len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer)

print(model.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 64)                55040     
_________________________________________________________________
dense_2 (Dense)              (None, 150)               9750      
_________________________________________________________________
activation_2 (Activation)    (None, 150)               0         
Total params: 64,790
Trainable params: 64,790
Non-trainable params: 0
_________________________________________________________________
None


In [42]:
!pip install tensorflowjs



In [0]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [0]:
class SampleResult(keras.callbacks.Callback):

    def on_epoch_end(self, epoch, logs={}):

        start_index = random.randint(0, len(text) - maxlen - 1)

        for diversity in [0.2, 0.5, 1.0, 1.2]:
            generated = ''
            sentence = text[start_index: start_index + maxlen]
            generated += sentence
            print()
            print('----- Generating with diversity',
                  diversity, 'seed: "' + sentence + '"')
            sys.stdout.write(generated)

            for i in range(100):
                x = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sentence):
                    x[0, t, char_indices[char]] = 1.

                preds = self.model.predict(x, verbose=0)[0]
                next_index = sample(preds, diversity)
                next_char = indices_char[next_index]

                generated += next_char
                sentence = sentence[1:] + next_char

                sys.stdout.write(next_char)
                sys.stdout.flush()
        print('\n\n')
sample_callback = SampleResult()

In [0]:
history = model.fit(X, y, 
                        epochs=10, 
                        batch_size=512,
                        verbose=2,
                       callbacks=[sample_callback])

Epoch 1/10
 - 123s - loss: 2.2713

----- Generating with diversity 0.2 seed: "enewable R"
enewable Resources
Compan Medical
Prostate Software
Start Communications
Conter Services
Contral Services
Seal
----- Generating with diversity 0.5 seed: "enewable R"
enewable Resources International
Accourter Technologies
Starestriand Technologies
Amplica
Stark Medical Labs
T
----- Generating with diversity 1.0 seed: "enewable R"
enewable Ramed Technologies
IQ
Timplon
MEI
LoFR
Kot
Quarami Technologies
Aft Dosty Resayme
ZiveBTART
CodrySal

----- Generating with diversity 1.2 seed: "enewable R"
enewable RudenVeass
Gaser NCCROV (NCEDISFVAB)
DiCabremitia Sucwor.
LendFrapTinbley
NerEdecIRR Remicative
Scher


Epoch 2/10
 - 121s - loss: 2.0159

----- Generating with diversity 0.2 seed: "nd Urethan"
nd Urethan Corp.
Active Technologies
Complist Corp
Compact
Start Corporation
Compacion Company
Caster Technolo
----- Generating with diversity 0.5 seed: "nd Urethan"
nd Urethan Angage
Netcorport
Carevalit
Soci

In [0]:
model.save('./model-startup.h5')

In [0]:
# import tensorflow as tf
# import tensorflow.contrib.eager as tfe

import tensorflowjs as tfjs
tfjs.converters.save_keras_model(model, './jsmodel/')

# tensorflowjs_converter.save_keras_model(model, './jsmodel/')
# tfjs.converters.save_keras_model(model, './jsmodel/')