In [3]:
from models.arcs import load_alt_model_a, load_alt_model_b, load_inf_model, GenPhiloText
from utilities.loaders import load_file
from utilities.preprocessors import preprocess, map_value_to_index, init_sequences_a, init_sequences_b
from utilities.visualizers import export_results

from tensorflow.keras.losses import CategoricalCrossentropy as cce_loss
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import CategoricalAccuracy, CategoricalCrossentropy as cce_metric

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
import tensorflow as tf

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
corpus = load_file('./data/notes.txt')

In [5]:
corpus[:500]

'A simple idea “What is the meaning of life?”\nI asked as I learned through the works of Camus? \nOne step down, I felt a yearning of meaning in this world.\nIn this yearning I stumbled upon eastern philosophy;\nIkigai as the Japanese philosophers called it was a considerable way for me to find meaning at that certain point in my life. \nFollowed then another idea, a leap of faith as Kierkegaard would call it, yet I had no idea this was his idea. \nCalm followed after the storm, and then I took another'

In [6]:
len(corpus)

226750

In [7]:
chars = sorted(list(set(corpus)))
chars

['\n',
 ' ',
 '!',
 '"',
 '&',
 "'",
 '(',
 ')',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'Y',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '´',
 'ç',
 'é',
 'ï',
 '–',
 '—',
 '‘',
 '’',
 '“',
 '”',
 '…']

# Preprocessing corpus
* replace quotation marks like this '“'/'”' with this instead '"'
* replace single quotation marks like this '‘'/'’' with ''' instead
* replace this hyphen '–' with this hyphen '—'
* lowercase all words (for now)
* replace 3 consecutive '.' with  '…' instead

In [8]:
corpus = preprocess(corpus)
corpus[:500]

'a simple idea "what is the meaning of life?"\ni asked as i learned through the works of camus? \none step down, i felt a yearning of meaning in this world.\nin this yearning i stumbled upon eastern philosophy;\nikigai as the japanese philosophers called it was a considerable way for me to find meaning at that certain point in my life. \nfollowed then another idea, a leap of faith as kierkegaard would call it, yet i had no idea this was his idea. \ncalm followed after the storm, and then i took another'

In [9]:
len(corpus)

226961

In [10]:
chars = sorted(list(set(corpus)))
chars = ['[UNK]'] + chars
chars

['[UNK]',
 '\n',
 ' ',
 '!',
 '"',
 '&',
 "'",
 '(',
 ')',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '?',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '´',
 'ç',
 'é',
 'ï',
 '—',
 '…']

In [11]:
n_unique = len(chars)

# Creating mapper from a unique character to its respective index

In [12]:
char_to_idx = map_value_to_index(chars, len(chars), 0)
idx_to_char = map_value_to_index(chars, len(chars), 0, inverted=True)

In [13]:
char_to_idx

{'[UNK]': 0,
 '\n': 1,
 ' ': 2,
 '!': 3,
 '"': 4,
 '&': 5,
 "'": 6,
 '(': 7,
 ')': 8,
 ',': 9,
 '-': 10,
 '.': 11,
 '/': 12,
 '0': 13,
 '1': 14,
 '2': 15,
 '3': 16,
 '4': 17,
 '6': 18,
 '7': 19,
 '8': 20,
 '9': 21,
 ':': 22,
 ';': 23,
 '?': 24,
 'a': 25,
 'b': 26,
 'c': 27,
 'd': 28,
 'e': 29,
 'f': 30,
 'g': 31,
 'h': 32,
 'i': 33,
 'j': 34,
 'k': 35,
 'l': 36,
 'm': 37,
 'n': 38,
 'o': 39,
 'p': 40,
 'q': 41,
 'r': 42,
 's': 43,
 't': 44,
 'u': 45,
 'v': 46,
 'w': 47,
 'x': 48,
 'y': 49,
 'z': 50,
 '´': 51,
 'ç': 52,
 'é': 53,
 'ï': 54,
 '—': 55,
 '…': 56}

In [14]:
idx_to_char

{0: '[UNK]',
 1: '\n',
 2: ' ',
 3: '!',
 4: '"',
 5: '&',
 6: "'",
 7: '(',
 8: ')',
 9: ',',
 10: '-',
 11: '.',
 12: '/',
 13: '0',
 14: '1',
 15: '2',
 16: '3',
 17: '4',
 18: '6',
 19: '7',
 20: '8',
 21: '9',
 22: ':',
 23: ';',
 24: '?',
 25: 'a',
 26: 'b',
 27: 'c',
 28: 'd',
 29: 'e',
 30: 'f',
 31: 'g',
 32: 'h',
 33: 'i',
 34: 'j',
 35: 'k',
 36: 'l',
 37: 'm',
 38: 'n',
 39: 'o',
 40: 'p',
 41: 'q',
 42: 'r',
 43: 's',
 44: 't',
 45: 'u',
 46: 'v',
 47: 'w',
 48: 'x',
 49: 'y',
 50: 'z',
 51: '´',
 52: 'ç',
 53: 'é',
 54: 'ï',
 55: '—',
 56: '…'}

In [15]:
n_time_steps = 100
X, Y = init_sequences_a(corpus, char_to_idx, T_x=n_time_steps)
X

array([[25,  2, 43, ..., 29,  2, 43],
       [ 2, 43, 33, ...,  2, 43, 44],
       [43, 33, 37, ..., 43, 44, 29],
       ...,
       [39, 42,  2, ..., 29, 36, 33],
       [42,  2, 44, ..., 36, 33, 29],
       [ 2, 44, 42, ..., 33, 29, 30]])

In [16]:
X.shape

(226861, 100)

In [17]:
Y

array([44, 29, 40, ..., 29, 30, 11])

# convert Y data's indeces to their one hot vector representation

In [18]:
Y = tf.one_hot(Y, depth=n_unique)
Y

<tf.Tensor: shape=(226861, 57), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [19]:
Y[3]

<tf.Tensor: shape=(57,), dtype=float32, numpy=
array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0.], dtype=float32)>

In [20]:
# number of examples
len(X)

226861

In [21]:
len(X[-1])

100

# Instantiate generative model A with set architecture

In [22]:
emb_dim = 64
n_a = 32

In [23]:
model = load_alt_model_a(n_unique=n_unique, T_x=n_time_steps, emb_dim=emb_dim, n_a=n_a, keep_prob=0.8, lambda_=0.1)
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 64)           3648      
                                                                 
 lstm (LSTM)                 (None, 100, 32)           12416     
                                                                 
 lstm_1 (LSTM)               (None, 32)                8320      
                                                                 
 dense (Dense)               (None, 57)                1881      
                                                                 
 batch_normalization (Batch  (None, 57)                228       
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 57)                0         
                                                       

# Provide loss, optimizer, and metrics for both alternative models A and B

In [24]:
opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
loss = cce_loss()
metrics = [CategoricalAccuracy(), cce_metric()]

# model.compile(loss=loss, optimizer=opt, metrics=metrics)

# Train alternative model A and checkpoint weights

In [25]:
# weights_path = "./weights/weights-improvement-{epoch:02d}-{categorical_accuracy:.4f}.hdf5"
# checkpoint = ModelCheckpoint(weights_path, monitor='categorical_accuracy', verbose=1, save_best_only=True, mode='max')
# callbacks_list = [checkpoint]

In [26]:
# history = model.fit(X, Y, epochs=20, batch_size=2048, callbacks=callbacks_list)

In [27]:
# export_results(history, ['loss'], image_only=False)
# export_results(history, ['categorical_accuracy'], image_only=False)

# Preprocessing for alternative model B

In [28]:
X, Y = init_sequences_b(corpus, char_to_idx, T_x=n_time_steps)
X

[[25,
  2,
  43,
  33,
  37,
  40,
  36,
  29,
  2,
  33,
  28,
  29,
  25,
  2,
  4,
  47,
  32,
  25,
  44,
  2,
  33,
  43,
  2,
  44,
  32,
  29,
  2,
  37,
  29,
  25,
  38,
  33,
  38,
  31,
  2,
  39,
  30,
  2,
  36,
  33,
  30,
  29,
  24,
  4,
  1,
  33,
  2,
  25,
  43,
  35,
  29,
  28,
  2,
  25,
  43,
  2,
  33,
  2,
  36,
  29,
  25,
  42,
  38,
  29,
  28,
  2,
  44,
  32,
  42,
  39,
  45,
  31,
  32,
  2,
  44,
  32,
  29,
  2,
  47,
  39,
  42,
  35,
  43,
  2,
  39,
  30,
  2,
  27,
  25,
  37,
  45,
  43,
  24,
  2,
  1,
  39,
  38,
  29,
  2,
  43],
 [29,
  40,
  2,
  28,
  39,
  47,
  38,
  9,
  2,
  33,
  2,
  30,
  29,
  36,
  44,
  2,
  25,
  2,
  49,
  29,
  25,
  42,
  38,
  33,
  38,
  31,
  2,
  39,
  30,
  2,
  37,
  29,
  25,
  38,
  33,
  38,
  31,
  2,
  33,
  38,
  2,
  44,
  32,
  33,
  43,
  2,
  47,
  39,
  42,
  36,
  28,
  11,
  1,
  33,
  38,
  2,
  44,
  32,
  33,
  43,
  2,
  49,
  29,
  25,
  42,
  38,
  33,
  38,
  31,
  2,
  33,
  2,
  43,


In [29]:
len(X)

2248

In [30]:
len(X[-2])

100

In [31]:
len(X[-1])

100

In [32]:
len(Y[-1])

100

In [33]:
X[-1]

[25,
 35,
 43,
 2,
 37,
 49,
 2,
 26,
 29,
 36,
 33,
 29,
 30,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

# Load a saved model
* see arhictecure
* see if prediction will work properly on dummy data

In [34]:
saved_model = load_model('./saved/models/test_model.h5')

In [37]:
saved_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 50)]                 0         []                            
                                                                                                  
 character_lookup (Embeddin  (None, 50, 32)               832       ['input_1[0][0]']             
 g)                                                                                               
                                                                                                  
 tf.__operators__.getitem (  (None, 32)                   0         ['character_lookup[0][0]']    
 SlicingOpLambda)                                                                                 
                                                                                              

#### Recall that our model needs 3 inputs, X, the hidden state, and the cell state. Because we are generating novel sequences using our trained model we pass in a $(1, 100)$ input where it represents the shape $(m, T_x)$, moreover our hidden and cell states remain the same in terms of their shape which is $(m, n_a)$ but only now it would be $(1, n_a)$ since we are passing only one input example to our model

In [35]:
sample_input = tf.random.uniform(shape=(1, 50), minval=0, maxval=25, dtype=tf.int32)

sample_h = tf.zeros(shape=(1, 128))
sample_c = tf.zeros(shape=(1, 128))

#### use the model to predict an output Y which we know will be of shape $(T_y, m, n_unique)$ or in this case since we only inputted one example $(T_y, 1, 26)$

In [36]:
saved_model.predict([sample_input, sample_h, sample_c])



[array([[1.29244705e-12, 3.73234466e-09, 3.42113228e-04, 5.56989512e-08,
         1.71724755e-12, 2.14259652e-15, 3.09830757e-05, 1.95669812e-14,
         9.99595463e-01, 3.33764787e-11, 1.03490506e-13, 3.94577285e-07,
         4.10664003e-10, 1.63977526e-10, 4.47127224e-10, 6.86967150e-12,
         9.49941580e-07, 2.81049615e-05, 2.20930563e-09, 8.07502069e-12,
         2.78912871e-09, 4.65281886e-07, 5.76804515e-09, 3.58712435e-11,
         1.41500061e-06, 1.53175949e-17]], dtype=float32),
 array([[5.2570941e-14, 7.4080730e-10, 2.2354507e-05, 7.3851125e-09,
         2.3144805e-13, 2.8446710e-16, 8.7719000e-06, 7.3597930e-15,
         9.9996328e-01, 6.3869643e-12, 6.7312459e-15, 8.0474493e-08,
         8.3880528e-11, 2.5585405e-11, 8.7025859e-11, 1.2640982e-12,
         1.5261787e-07, 4.9825421e-06, 6.2219707e-10, 8.5380758e-13,
         1.2614544e-09, 1.8329703e-07, 4.6927395e-10, 2.4469987e-11,
         2.4569502e-07, 4.8417272e-18]], dtype=float32),
 array([[9.4512673e-15, 1.209689