# RNN for Character Recognition

## Libraries and Utilities

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import random
import sys

In [None]:
def sample(preds, temperature =1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds)/temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probas = np.random.multinomial(1,preds,1)
  return np.argmax(probas)

def generate_text(model, text, maxlen, generationlen):
  #select a text seed at random
  start_index = random.randint(0,len(text)-maxlen -1)
  generated_text = text[start_index:start_index+maxlen]
  print(' --- Generating text with seed:"' + generated_text + '"')

  #tries a range of different temperatures
  for temperature in [0.2, 0.5, 1.0, 1.2]:
    print('----temperature:', temperature)
    sys.stdout.write(generated_text)

    #generates characters starting from the seed text
    for i in range(generationlen):

      #one-hot encodes the characters generated so far
      sampled = np.zeros((1,maxlen,len(chars)))
      for t, char in enumerate(generated_text):
        sampled[0, t, char_indices[char]] = 1.
      
      #samples the next character
      preds = model.predict(sampled, verbose = 0)[0]
      next_index = sample(preds, temperature)
      next_char = chars[next_index]
      
      #appends the newly generated character 
      generated_text += next_char
      generated_text = generated_text[1:]

      sys.stdout.write(next_char)
    sys.stdout.write('\n\n')


## Load the data

here I chose to use as source text **Anarchism and Socialism**, by George Plechanoff


https://www.gutenberg.org/files/30506/30506.txt

In [None]:
path = keras.utils.get_file(
    '305068.txt',
    origin = 'https://www.gutenberg.org/files/30506/30506.txt'
)

text = open(path).read().lower()

text = text[12425 + 270 + 13:500000]

print('corpus length:', len(text))

Downloading data from https://www.gutenberg.org/files/30506/30506.txt
corpus length: 210387


In [None]:
# print the first 100 character of the manuscript
print(text[:100])

anarchism and socialism




chapter i

the point of view of the utopian socialists


the french mate


In [None]:
maxlen = 100 #length of the extracted sequences for training
step = 1 #we sample a sequence every step characters


sentences = [] #this will hold the extracted sequences
next_chars = [] #this will hold the target charcters (the follow-up characters)
for i in range(0, len(text)-maxlen, step):
  sentences.append(text[i:i+maxlen])
  next_chars.append(text[i+maxlen])

print('Number of sequences:', len(sentences))

chars = sorted(list(set(text))) #list of unique characters in the corpus
print('Unique characters:', len(chars))
#create a dictionary that maps unique characters to their index in the list "chars"
char_indices = dict((char,chars.index(char)) for char in chars)

print('Vectorization...', end = '')
#we (one-hot-)encode the charcters into binary arrays 
x = np.zeros((len(sentences), maxlen, len(chars)), dtype = bool)
y = np.zeros((len(sentences), len(chars)), dtype = bool)
for i, sentence in enumerate(sentences):
  for t, char in enumerate(sentence):
    x[i, t, char_indices[char]] = 1
  y[i,char_indices[next_chars[i]]] = 1
print('completed')

Number of sequences: 210287
Unique characters: 58
Vectorization...completed


In [None]:
x.shape

(210287, 100, 58)

In [None]:
y.shape

(210287, 58)

## LSTM Model


In [None]:
model = keras.models.Sequential()
model.add(keras.layers.LSTM(128, input_shape = (maxlen, len(chars))))
model.add(keras.layers.Dense(len(chars), activation = 'softmax'))

optimizer = keras.optimizers.Adam(learning_rate = 0.01)
#we use categorical crossentropy because the targets are one-hot encoded
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer)

### Training

In [None]:
model.fit(x,y,batch_size=1024, epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f09d7362d70>

## LSTM Generate Text

In [None]:
generate_text(model, text, maxlen, 500)

 --- Generating text with seed:"premely
ridiculous.

"anarchy means the negation of authority. now, government claims to base
the le"
----temperature: 0.2
premely
ridiculous.

"anarchy means the negation of authority. now, government claims to base
the least and the property of the "propaganda of the results of the production of the "contract" price. the property of the production of the production of the proletariat is the product of the production of the production of the production of the anarchists all the property of the property of the works of the production of the property of the production of the property of the product of the property of the product of society of the property of the production of the reader of the production of the wor

----temperature: 0.5
 the product of society of the property of the production of the reader of the production of the working organised in the especially them as they de l'allions and the proletariat is the production of his property in the so

## GRU Model

In [None]:
gru_model = keras.models.Sequential()
gru_model.add(keras.layers.GRU(128, input_shape = (maxlen, len(chars))))
gru_model.add(keras.layers.Dense(len(chars), activation = 'softmax'))

optimizer = keras.optimizers.Adam(learning_rate = 0.01)
#we use categorical crossentropy because the targets are one-hot encoded
gru_model.compile(loss = 'categorical_crossentropy', optimizer = optimizer)

### Training

In [None]:
gru_model.fit(x,y,batch_size=1024, epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f09d64b8310>

## GRU Generate Text

In [None]:
generate_text(gru_model, text, maxlen, 200)

 --- Generating text with seed:"mpetition with every possible system of fines,
stoppages from wages, espionage, etc.; the workman ma"
----temperature: 0.2
mpetition with every possible system of fines,
stoppages from wages, espionage, etc.; the workman may contracting its even the state with the property, and it is the state and the state is the "communist" and the property of the social organisation of the property of the "communist" and the property

----temperature: 0.5
ist" and the property of the social organisation of the property of the "communist" and the property in order to do with the proletarians in the production in the nature. the state, the property, a project gutenberg-tm license society the point of the learned the revolutions of the paper of the word

----temperature: 1.0
oject gutenberg-tm license society the point of the learned the revolutions of the paper of the word
human nature. for anaties, "the state. and abllen itselfing exchanications, stirner. but
one menia

## Not Shallow RNN Model

In [None]:
rnn_model = keras.models.Sequential()
rnn_model.add(keras.layers.SimpleRNN(32, input_shape = (maxlen, len(chars)), return_sequences=True))
rnn_model.add(keras.layers.SimpleRNN(64))
rnn_model.add(keras.layers.Dense(len(chars), activation = 'softmax'))

optimizer = keras.optimizers.Adam(learning_rate = 0.01)
#we use categorical crossentropy because the targets are one-hot encoded
rnn_model.compile(loss = 'categorical_crossentropy', optimizer = optimizer)

### Training

In [None]:
rnn_model.fit(x,y,batch_size=1024, epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f09d65c4fa0>

## Not Shallow RNN Generate Model

In [None]:
generate_text(rnn_model, text, maxlen, 200)

 --- Generating text with seed:"
november, 1879. "in 1881, in the month of september, when the dyers'
strike broke out at villefranc"
----temperature: 0.2

november, 1879. "in 1881, in the month of september, when the dyers'
strike broke out at villefrance and it is the state the social social common the socialist of the struggle of the social more a social common of the individualism of property of the struggle and the social social socialism of the 

----temperature: 0.5
cial common of the individualism of property of the struggle and the social social socialism of the all the state is the person and the adrotes of the
theorisation of society of the social organisation of society of the working minders site to the same in marx, of the peast of the enconotist and all

----temperature: 1.0
n of society of the working minders site to the same in marx, of the peast of the enconotist and all is ond the compantizin amnenks,
fid the acios to
work,! to babows; "a mistiolonge. 
"agek to probl