# This notebook will aim to use the trained models weights and load it on the inference model to generate predictions and novel sequences

In [79]:
from models.arcs import generate, GenPhiloTextA
from utilities.loaders import load_file
from utilities.preprocessors import preprocess, get_chars, map_value_to_index, decode_predictions
import tensorflow as tf

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load raw data

In [80]:
corpus = load_file('./data/notes.txt')
corpus = preprocess(corpus)
chars = get_chars(corpus)
char_to_idx = map_value_to_index(chars)
idx_to_char = map_value_to_index(chars, inverted=True)

In [81]:
char_to_idx.get_vocabulary()

['[UNK]',
 '\n',
 ' ',
 '!',
 '"',
 '&',
 "'",
 '(',
 ')',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '?',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '´',
 'ç',
 'é',
 'ï',
 '—',
 '…']

In [82]:
n_unique = len(char_to_idx.get_vocabulary())

# Declare same hyper params used in training model

In [83]:
emb_dim = 256
n_a = 512
T_x = 100
dense_layers_dims = [n_unique]
batch_size = 128
alpha = 1e-3
lambda_ = 0.8
drop_prob = 0.4
normalize = False
n_epochs =  100

# Declare sample input to build inference model and access .summary()

In [84]:
sample_input = tf.random.uniform(shape=(1, T_x), minval=0, maxval=n_unique - 1, dtype=tf.int32)
sample_h = tf.zeros(shape=(1, n_a))
sample_c = tf.zeros(shape=(1, n_a))

# Redeclare architecture by passing the same hyper params used in training and then load weights

In [85]:
saved_model = GenPhiloTextA(emb_dim=emb_dim, n_a=n_a, n_unique=n_unique, dense_layers_dims=dense_layers_dims, drop_prob=drop_prob, normalize=normalize)
saved_model(sample_input)

<tf.Tensor: shape=(1, 100, 57), dtype=float32, numpy=
array([[[-1.33619842e-03,  3.13621992e-03, -2.21148343e-03, ...,
         -2.92795245e-03,  2.27503013e-04,  3.00831767e-03],
        [-6.20965101e-03,  4.55288403e-03, -6.15663640e-03, ...,
         -2.13051680e-05, -8.38564010e-04,  6.84343791e-03],
        [-4.63428348e-03,  1.04413843e-02, -3.23848473e-03, ...,
          1.03993174e-02, -5.64499479e-03,  1.47058303e-03],
        ...,
        [ 6.62382692e-04,  7.68435793e-03, -1.22926570e-03, ...,
         -1.35293845e-02, -1.68554820e-02, -3.80272116e-03],
        [-9.15273326e-04, -1.92942796e-04, -3.65006179e-03, ...,
         -5.86212613e-03, -1.55286584e-02, -3.13036493e-04],
        [-9.86047322e-04,  4.66224598e-03, -2.23039975e-03, ...,
         -1.96545944e-02, -1.87814608e-02, -6.25664787e-03]]],
      dtype=float32)>

In [86]:
sample_input = tf.random.uniform(shape=(1, T_x + 10), minval=0, maxval=n_unique - 1, dtype=tf.int32)
saved_model(sample_input)

<tf.Tensor: shape=(1, 110, 57), dtype=float32, numpy=
array([[[ 0.0059837 , -0.0008248 ,  0.00543853, ..., -0.00291569,
          0.00311262,  0.00386178],
        [ 0.01022061, -0.00227587,  0.00012124, ..., -0.00649941,
          0.00553274, -0.00154827],
        [ 0.00604571, -0.00104487,  0.0028869 , ..., -0.0074162 ,
         -0.00280376, -0.00114323],
        ...,
        [-0.00193854,  0.0049914 , -0.00059364, ..., -0.00274165,
         -0.00729378,  0.00113984],
        [ 0.00280926,  0.00717192,  0.0055715 , ..., -0.0062622 ,
         -0.00608908,  0.00033527],
        [ 0.00753055,  0.00463874,  0.00291101, ..., -0.00241659,
          0.00227134, -0.00550668]]], dtype=float32)>

In [87]:
saved_model.trainable_weights

[<tf.Variable 'gen_philo_text_a_5/character-lookup/embeddings:0' shape=(57, 256) dtype=float32, numpy=
 array([[ 0.01257909,  0.02009906,  0.03329917, ...,  0.04545503,
          0.00037428, -0.04652026],
        [ 0.03750959, -0.03123593,  0.00890447, ...,  0.02366802,
          0.00490095, -0.01255263],
        [-0.026201  , -0.03308878, -0.00897201, ..., -0.03017755,
         -0.0072464 , -0.04433332],
        ...,
        [ 0.04677354, -0.01812103, -0.02034912, ..., -0.03995552,
          0.0014082 ,  0.00329578],
        [-0.02980897,  0.00070242, -0.00374442, ..., -0.01520568,
         -0.00380508, -0.01011614],
        [-0.04408456, -0.02732155,  0.02988838, ...,  0.04472924,
          0.00410337,  0.03873762]], dtype=float32)>,
 <tf.Variable 'gen_philo_text_a_5/lstm-layer/lstm_cell/kernel:0' shape=(256, 2048) dtype=float32, numpy=
 array([[-0.02581741, -0.04000294,  0.01973089, ...,  0.01416148,
          0.00453249,  0.01662399],
        [ 0.03728869,  0.04098679,  0.02973775,

In [88]:
saved_model.summary()

Model: "gen_philo_text_a_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 character-lookup (Embeddin  multiple                  14592     
 g)                                                              
                                                                 
 lstm-layer (LSTM)           multiple                  1574912   
                                                                 
 dense-layer-0 (Dense)       multiple                  29241     
                                                                 
Total params: 1618745 (6.18 MB)
Trainable params: 1618745 (6.18 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [89]:
saved_model.load_weights(filepath='./saved/weights/notes_gen_philo_text_a_100_3.0299.h5')

In [90]:
saved_model.trainable_weights

[<tf.Variable 'gen_philo_text_a_5/character-lookup/embeddings:0' shape=(57, 256) dtype=float32, numpy=
 array([[-2.27964771e-31, -1.04065716e-31,  6.76143058e-32, ...,
          5.78990514e-33, -6.32452695e-35,  1.94203334e-32],
        [-2.33196533e-06, -5.57359442e-07,  7.22897767e-07, ...,
          3.29896963e-08,  4.24313072e-07, -7.51945777e-08],
        [-6.22333800e-06, -8.55494363e-06,  1.20172226e-05, ...,
          3.50521004e-05,  2.60806992e-05, -1.78583414e-05],
        ...,
        [ 1.16941873e-11,  5.07066784e-11,  2.42132052e-12, ...,
          1.95506927e-11, -9.94128564e-12,  5.03835099e-11],
        [-1.29197097e-09, -5.15809404e-08,  6.50408394e-09, ...,
          1.08325215e-08,  1.27126455e-07, -2.77360197e-08],
        [ 6.44838792e-07, -1.91505521e-07,  9.48950856e-08, ...,
          1.13875785e-06,  8.59064301e-07, -3.50037581e-08]], dtype=float32)>,
 <tf.Variable 'gen_philo_text_a_5/lstm-layer/lstm_cell/kernel:0' shape=(256, 2048) dtype=float32, numpy=
 arra

# generate characters using prompt

In [91]:
prompts = ["dostoevsky"]
pred_ids_a = generate(saved_model, prompts=prompts, char_to_idx=char_to_idx, temperature=1.0)
pred_ids_b = generate(saved_model, prompts=prompts, char_to_idx=char_to_idx, temperature=0.8)
pred_ids_c = generate(saved_model, prompts=prompts, char_to_idx=char_to_idx, temperature=0.6)
pred_ids_d = generate(saved_model, prompts=prompts, char_to_idx=char_to_idx, temperature=0.4)

tf.Tensor([[28 39 43 44 39 29 46 43 35 49]], shape=(1, 10), dtype=int64)
tf.Tensor(
[[[-9.7952491e-01 -4.9550895e-02  7.8759319e-01 -9.4414169e-01
   -9.1001797e-01 -9.8696464e-01 -1.0040903e+00 -9.8399627e-01
   -9.6389097e-01 -1.4198892e-03 -9.9328011e-01 -3.3886018e-01
   -9.5995331e-01 -9.6912402e-01 -9.7228730e-01 -1.0030477e+00
   -9.4810551e-01 -1.0059488e+00 -9.9453092e-01 -9.9514341e-01
   -9.7884911e-01 -1.0189379e+00 -9.2216134e-01 -9.6369183e-01
   -9.5581561e-01  9.0897304e-01  3.4355658e-01  6.2924218e-01
    8.2376444e-01  9.0804207e-01  7.2464401e-01  5.3681123e-01
    9.0523499e-01  8.8704216e-01 -9.1242683e-01 -4.0618929e-01
    8.0495578e-01  6.1087185e-01  8.9835668e-01  9.1019070e-01
    2.1426600e-01 -9.6540904e-01  8.5857338e-01  9.0975851e-01
    9.2934954e-01  7.0334327e-01  2.4025586e-01  5.6966954e-01
   -9.3542063e-01  5.4357678e-01 -9.6410060e-01 -9.4257385e-01
   -9.8725039e-01 -9.6333325e-01 -1.0216758e+00 -9.2859179e-01
   -9.4579798e-01]
  [-1.1025426e+

In [92]:
pred_ids_a

<tf.Tensor: shape=(1, 260), dtype=int64, numpy=
array([[28, 39, 43, 44, 39, 29, 46, 43, 35, 49, 37, 29, 43,  2,  1, 38,
        39, 45, 45, 29, 29,  2, 29, 33, 44, 38, 45,  2, 28, 29, 39, 27,
        39,  2, 28, 29, 42, 40, 42, 38, 38,  2, 25, 49, 28, 33, 11, 44,
        22,  2, 36, 30,  2,  2,  9,  2, 32, 25, 47,  2, 43, 44, 35,  2,
         2,  4, 43,  1, 44, 36, 43,  2, 38, 31, 44,  2, 25,  9, 36, 38,
        44, 44, 32,  2, 38, 56,  2, 38, 38, 47, 19, 32, 27,  2, 24, 33,
        32, 31,  2, 43,  2, 38, 29, 43, 29, 29, 38,  1, 39, 27,  2, 29,
        35, 33, 44, 38, 33, 38, 42,  2, 40, 27, 47, 32, 38, 44,  2, 39,
        28, 44,  2, 29, 39, 32, 29, 12, 33, 25, 44, 32, 44,  2, 43, 29,
         1, 37, 44,  2, 25, 33, 33, 29,  2, 44, 28, 29, 31, 33, 29, 32,
        29,  2, 29, 38, 10, 29, 39, 33, 38, 29, 29,  2,  2, 39, 32,  2,
        32, 27,  2, 42, 33, 44, 44, 39, 33, 46,  2, 25, 39, 49,  2, 44,
        29, 29,  9, 36,  2, 43, 44, 49,  2, 33, 27, 39, 25,  8, 42, 54,
        29, 44, 

# Extract trainable layers

In [93]:
decode_predictions(pred_ids_a, idx_to_char)

'dostoevskymes \nnouuee eitnu deoco derprnn aydi.t: lf  , haw stk  "s\ntls ngt a,lntth n… nnw7hc ?ihg s neseen\noc ekitninr pcwhnt odt eohe/iatht se\nmt aiie tdegiehe en-eoinee  oh hc rittoiv aoy tee,l sty icoa)rïetfn6behodr anrer v s aatghfoastnms\n\nhb nnodge rieoe'

In [94]:
decode_predictions(pred_ids_b, idx_to_char)

'dostoevskyo c&d gesel  h  d  hfcet nneynain eleh   aiike nocti n   ettdewobca snhs o\nebauteaatosina i sna ilimo 6t nii ahebtre ain hiidhe na  ipa  a a srdeiayadenacy  rie c ahtto edn i a codnsan voerhd yde8nnioeenhsoweie ceh\nih saseosashyih   3ea te.ot os.—lrr'

In [95]:
decode_predictions(pred_ids_c, idx_to_char)

'dostoevskyh l e et e a  ib lo  e  e snea n n aaisigeh a  h noits oeof ionn o    dst  a ae   aeeit eaiale   eoeie a\n i n dn yie iet e hgeh ho iisseast    eetn th8thk bh o    ite  ehn  ha   ol e nnei iaietoh o oeehtieiseee i  r l   es doona nst e ein orhab   i e'

In [96]:
decode_predictions(pred_ids_d, idx_to_char)

'dostoevsky  h      on   eet ao          s   w  t  o i  t o e  t   ero   pe enail a  oe te  t   nee o oie rtinh eet t  i   oia     of     n taes     t i e      n t atdn ioeaa t e      tes a ee       s  a     it   e  ent str    i    e  a  ei      otoh  a e h   a'