# This notebook will aim to use the trained models weights and load it on the inference model to generate predictions and novel sequences

In [7]:
from models.arcs import generate, GenPhiloTextA
from utilities.loaders import load_file, load_lookup_table, save_lookup_table
from utilities.preprocessors import preprocess, get_chars, map_value_to_index, decode_predictions
import tensorflow as tf

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load raw data

In [8]:
corpus = load_file('./data/notes.txt')
corpus = preprocess(corpus)
chars = get_chars(corpus)
char_to_idx = map_value_to_index(chars)
idx_to_char = map_value_to_index(chars, inverted=True)

In [9]:
save_lookup_table('./saved/misc/char_to_idx', char_to_idx.get_vocabulary())

In [10]:
char_to_idx.get_vocabulary()

['[UNK]',
 '\n',
 ' ',
 '!',
 '"',
 '&',
 "'",
 '(',
 ')',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '?',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '´',
 'ç',
 'é',
 'ï',
 '—',
 '…']

In [11]:
n_unique = len(char_to_idx.get_vocabulary())

# Declare same hyper params used in training model

In [12]:
emb_dim = 256
n_a = 512
T_x = 100
dense_layers_dims = [n_unique]
batch_size = 128
alpha = 1e-3
lambda_ = 0.8
drop_prob = 0.4
normalize = False
n_epochs =  100

# Declare sample input to build inference model and access .summary()

In [13]:
sample_input = tf.random.uniform(shape=(1, T_x), minval=0, maxval=n_unique - 1, dtype=tf.int32)
sample_h = tf.zeros(shape=(1, n_a))
sample_c = tf.zeros(shape=(1, n_a))

# Redeclare architecture by passing the same hyper params used in training and then load weights

In [14]:
saved_model = GenPhiloTextA(emb_dim=emb_dim, n_a=n_a, n_unique=n_unique, dense_layers_dims=dense_layers_dims, drop_prob=drop_prob, normalize=normalize)
saved_model(sample_input)

<tf.Tensor: shape=(1, 100, 57), dtype=float32, numpy=
array([[[ 0.00637347, -0.00194257,  0.0017184 , ...,  0.0024135 ,
          0.00158272,  0.00636447],
        [ 0.00016128, -0.00831537, -0.00094313, ..., -0.00443306,
          0.00453097,  0.0103037 ],
        [ 0.00189294, -0.00532009, -0.00043195, ..., -0.00377793,
          0.00409863,  0.00437624],
        ...,
        [-0.00359458,  0.00333639,  0.0024159 , ..., -0.00248924,
         -0.00579833, -0.00299237],
        [-0.00579094,  0.00034849,  0.00371646, ...,  0.00246411,
          0.00064153,  0.00654348],
        [-0.00735949, -0.00069961,  0.00440178, ...,  0.00676476,
          0.00478449,  0.01283024]]], dtype=float32)>

In [15]:
sample_input = tf.random.uniform(shape=(1, T_x + 10), minval=0, maxval=n_unique - 1, dtype=tf.int32)
saved_model(sample_input)

<tf.Tensor: shape=(1, 110, 57), dtype=float32, numpy=
array([[[ 5.2687586e-03,  3.1722626e-03,  1.1256072e-04, ...,
          2.1474226e-04,  1.6479901e-03, -6.2895697e-03],
        [-3.1717243e-03,  7.5293100e-03, -1.0402719e-02, ...,
         -4.3712957e-03,  8.5224174e-03, -8.8044908e-04],
        [-1.4263841e-02,  5.5891983e-03, -1.1998402e-02, ...,
         -3.7695202e-03,  1.3822104e-02,  1.9574470e-03],
        ...,
        [ 4.8929085e-03,  7.2982209e-04, -4.1304240e-03, ...,
         -6.0831662e-05, -2.4699990e-04,  2.8615142e-03],
        [ 4.0615827e-04,  7.0680259e-04, -1.1386813e-03, ...,
         -5.0054197e-03, -3.8970208e-03,  8.8206809e-03],
        [ 1.6974423e-03, -1.5690953e-03, -4.9510542e-03, ...,
          1.1603220e-04, -3.6719686e-03,  3.8912983e-03]]], dtype=float32)>

In [16]:
saved_model.trainable_weights

[<tf.Variable 'gen_philo_text_a/character-lookup/embeddings:0' shape=(57, 256) dtype=float32, numpy=
 array([[ 0.02606279,  0.00845665, -0.00700065, ..., -0.0207814 ,
          0.00040376, -0.00597534],
        [ 0.04545244, -0.04270212, -0.01619456, ..., -0.0393423 ,
          0.0122922 , -0.02106935],
        [ 0.03026423,  0.04640536, -0.04253275, ..., -0.02059147,
          0.04035181, -0.03732193],
        ...,
        [-0.04143448, -0.00850261, -0.03730037, ..., -0.04218527,
          0.00363863, -0.01516747],
        [-0.0362542 , -0.02035438,  0.02146741, ...,  0.03946495,
          0.0368053 , -0.04947501],
        [ 0.04419127, -0.01613493,  0.01007401, ...,  0.01508066,
          0.03857795,  0.04362081]], dtype=float32)>,
 <tf.Variable 'gen_philo_text_a/lstm-layer/lstm_cell/kernel:0' shape=(256, 2048) dtype=float32, numpy=
 array([[ 0.01566482, -0.02201293, -0.01178078, ..., -0.03892044,
          0.04017303, -0.04851724],
        [-0.03733806, -0.01297165, -0.00211552, ...

In [17]:
saved_model.summary()

Model: "gen_philo_text_a"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 character-lookup (Embeddin  multiple                  14592     
 g)                                                              
                                                                 
 lstm-layer (LSTM)           multiple                  1574912   
                                                                 
 dense-layer-0 (Dense)       multiple                  29241     
                                                                 
Total params: 1618745 (6.18 MB)
Trainable params: 1618745 (6.18 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [18]:
saved_model.load_weights(filepath='./saved/weights/notes_gen_philo_text_a_100_3.0299.h5')

In [19]:
saved_model.trainable_weights

[<tf.Variable 'gen_philo_text_a/character-lookup/embeddings:0' shape=(57, 256) dtype=float32, numpy=
 array([[-2.27964771e-31, -1.04065716e-31,  6.76143058e-32, ...,
          5.78990514e-33, -6.32452695e-35,  1.94203334e-32],
        [-2.33196533e-06, -5.57359442e-07,  7.22897767e-07, ...,
          3.29896963e-08,  4.24313072e-07, -7.51945777e-08],
        [-6.22333800e-06, -8.55494363e-06,  1.20172226e-05, ...,
          3.50521004e-05,  2.60806992e-05, -1.78583414e-05],
        ...,
        [ 1.16941873e-11,  5.07066784e-11,  2.42132052e-12, ...,
          1.95506927e-11, -9.94128564e-12,  5.03835099e-11],
        [-1.29197097e-09, -5.15809404e-08,  6.50408394e-09, ...,
          1.08325215e-08,  1.27126455e-07, -2.77360197e-08],
        [ 6.44838792e-07, -1.91505521e-07,  9.48950856e-08, ...,
          1.13875785e-06,  8.59064301e-07, -3.50037581e-08]], dtype=float32)>,
 <tf.Variable 'gen_philo_text_a/lstm-layer/lstm_cell/kernel:0' shape=(256, 2048) dtype=float32, numpy=
 array([[

# generate characters using prompt

In [20]:
prompts = ["hello"]
pred_ids_a = generate(saved_model, prompts=prompts, char_to_idx=char_to_idx, temperature=1.0)
pred_ids_b = generate(saved_model, prompts=prompts, char_to_idx=char_to_idx, temperature=0.8)
pred_ids_c = generate(saved_model, prompts=prompts, char_to_idx=char_to_idx, temperature=0.6)
pred_ids_d = generate(saved_model, prompts=prompts, char_to_idx=char_to_idx, temperature=0.4)


tf.Tensor([[32 29 36 36 39]], shape=(1, 5), dtype=int64)
tf.Tensor(
[[[-9.7952789e-01 -4.9550407e-02  7.8760695e-01 -9.4414473e-01
   -9.1001999e-01 -9.8696762e-01 -1.0040931e+00 -9.8399913e-01
   -9.6389389e-01 -1.4195003e-03 -9.9328291e-01 -3.3886015e-01
   -9.5995611e-01 -9.6912700e-01 -9.7229034e-01 -1.0030506e+00
   -9.4810855e-01 -1.0059516e+00 -9.9453384e-01 -9.9514633e-01
   -9.7885209e-01 -1.0189408e+00 -9.2216438e-01 -9.6369439e-01
   -9.5581770e-01  9.0898013e-01  3.4355694e-01  6.2924385e-01
    8.2376647e-01  9.0805137e-01  7.2464496e-01  5.3681183e-01
    9.0523934e-01  8.8704902e-01 -9.1242909e-01 -4.0618968e-01
    8.0495793e-01  6.1087257e-01  8.9836323e-01  9.1019720e-01
    2.1426669e-01 -9.6541142e-01  8.5857725e-01  9.0976363e-01
    9.2935693e-01  7.0334429e-01  2.4025665e-01  5.6967038e-01
   -9.3542272e-01  5.4357767e-01 -9.6410286e-01 -9.4257683e-01
   -9.8725337e-01 -9.6333629e-01 -1.0216787e+00 -9.2859411e-01
   -9.4580013e-01]
  [-1.1025574e+00 -2.9315751e-

In [21]:
pred_ids_a

<tf.Tensor: shape=(1, 255), dtype=int64, numpy=
array([[32, 29, 36, 36, 39, 28, 35,  2, 42,  2, 38, 32,  2, 37, 40, 32,
        37, 42, 25, 29, 39,  2,  2, 29, 29,  2, 38, 25, 39, 12,  2, 38,
        55,  2, 29, 36, 39, 25, 39, 38, 37, 39, 39, 38, 25, 44, 52, 53,
        29, 17, 39, 38, 43, 40,  2, 25, 39,  8,  2, 29, 27, 39, 29,  2,
        40, 38, 25, 33, 42, 32,  2,  2,  2, 44,  2, 33, 33,  2,  2, 29,
         1, 44, 29, 39,  2, 33, 39, 43,  2, 47, 29, 33, 29, 30,  2,  1,
        38, 36, 29, 38, 37,  2, 27,  2, 29, 29, 38, 39, 42, 25, 33, 37,
        29, 38, 36,  2, 42, 28, 42, 42,  2,  8, 33, 36, 39, 44, 29,  2,
        44, 39,  2,  2, 43, 25, 29, 21,  1, 33, 42, 47, 31, 27,  2, 20,
        38, 39, 26, 25, 27, 56, 43,  2, 38, 33, 32, 45, 29,  2, 51,  1,
         2, 44, 44,  2, 39,  7, 33,  2, 32, 32,  2, 29, 43, 32, 28, 42,
        28, 39, 42,  2,  2, 43, 32,  2, 32,  9, 42, 38, 25,  2, 39, 26,
        43,  2, 44, 29, 32, 38, 44, 32, 36, 39, 38, 44, 39, 39, 25,  5,
        44, 44, 

# Extract trainable layers

In [22]:
decode_predictions(pred_ids_a, idx_to_char)

'hellodk r nh mphmraeo  ee nao/ n— eloaonmoonatçée4onsp ao) ecoe pnairh   t ii  e\nteo ios weief \nnlenm c eenoraimenl rdrr )ilote to  sae9\nirwgc 8nobac…s nihue ´\n tt o(i hh eshdrdor  sh h,rna obs tehnthlontooa&tt  rii amço a  alugtilen tn ss e t iegs.oevh e'

In [23]:
decode_predictions(pred_ids_b, idx_to_char)

'helloi aaep seaa,o,y deasnh can9eun in  nnnh…di toafwva i n heonr mo iipl 1ko  nyeneeeh  a   etttoanniuna h eeee,otsata,la dg dee0  h taa ensa  nlui otaomngi  e toeh eth g ri   atsdmevipt rbwhh— ar a" rp sbeiceennno sthuerrden erniontdib/aoa nai tiiaieit '

In [24]:
decode_predictions(pred_ids_c, idx_to_char)

'hellon    neiws hiad dt uee  l  des  it \naf  iuonwslaoe rans rthtateattel hte okttise de n si  hfe iio sliu   h    cua  sa i… hs  eliier  arre fbs oky  h   et obaaçadv s  s c nd  niw yi e sit t  s  a  onteit  en ean n st  m   h f et eut sn , aotste      i'

In [25]:
decode_predictions(pred_ids_d, idx_to_char)

'helloe a    t    roe   e   ii ia an   e  n n   a   a  th e     a     ie t e d h s  t    a i oh  h ee e    e n ti    e a n            n  a ro t sei o  eti   a n     e  esw ro atni  h ta i         a o e     eit  r ee nh ttna a     i  sia es    e n ee      a'

In [26]:
vocab = load_lookup_table('./saved/misc/char_to_idx')

In [28]:
type(vocab)

list