In [1]:
from models.arcs import load_alt_model_a, load_alt_model_b, load_inf_model, GenPhiloText
from utilities.loaders import load_file
from utilities.preprocessors import preprocess, map_value_to_index, init_sequences

from tensorflow.keras.losses import CategoricalCrossentropy as cce_loss
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import CategoricalAccuracy, CategoricalCrossentropy as cce_metric

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow import one_hot

%load_ext autoreload
%autoreload 2

In [2]:
corpus = load_file('./data/notes.txt')

In [3]:
corpus[:500]

'A simple idea “What is the meaning of life?”\nI asked as I learned through the works of Camus? \nOne step down, I felt a yearning of meaning in this world.\nIn this yearning I stumbled upon eastern philosophy;\nIkigai as the Japanese philosophers called it was a considerable way for me to find meaning at that certain point in my life. \nFollowed then another idea, a leap of faith as Kierkegaard would call it, yet I had no idea this was his idea. \nCalm followed after the storm, and then I took another'

In [4]:
len(corpus)

226750

In [5]:
chars = sorted(list(set(corpus)))
chars

['\n',
 ' ',
 '!',
 '"',
 '&',
 "'",
 '(',
 ')',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'Y',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '´',
 'ç',
 'é',
 'ï',
 '–',
 '—',
 '‘',
 '’',
 '“',
 '”',
 '…']

# Preprocessing corpus
* replace quotation marks like this '“'/'”' with this instead '"'
* replace single quotation marks like this '‘'/'’' with ''' instead
* replace this hyphen '–' with this hyphen '—'
* lowercase all words (for now)
* replace 3 consecutive '.' with  '…' instead

In [6]:
corpus = preprocess(corpus)
corpus[:2000]

'a simple idea "what is the meaning of life?"\ni asked as i learned through the works of camus? \none step down, i felt a yearning of meaning in this world.\nin this yearning i stumbled upon eastern philosophy;\nikigai as the japanese philosophers called it was a considerable way for me to find meaning at that certain point in my life. \nfollowed then another idea, a leap of faith as kierkegaard would call it, yet i had no idea this was his idea. \ncalm followed after the storm, and then i took another step down. \nalthough i know the next idea i had was because of the idea of meaninglessness by camus, i was not sure where this exact idea started — the idea of right and wrong, good and evil. this one step down i would say was worse than the one before, as it forced me to think deeper than ever, thereby not knowingly using regression to find the answers i was seeking. if i was to be caveman, regress to a less developed state of man, a blank slate as i call it, how would i invent good an

In [7]:
chars = sorted(list(set(corpus)))
chars

['\n',
 ' ',
 '!',
 '"',
 '&',
 "'",
 '(',
 ')',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '?',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '´',
 'ç',
 'é',
 'ï',
 '—',
 '…']

In [8]:
n_unique = len(chars)

# Creating mapper from a unique character to its respective index

In [9]:
char_to_idx = map_value_to_index(chars, len(chars), 0)
idx_to_char = map_value_to_index(chars, len(chars), 0, inverted=True)

In [10]:
char_to_idx

{'\n': 0,
 ' ': 1,
 '!': 2,
 '"': 3,
 '&': 4,
 "'": 5,
 '(': 6,
 ')': 7,
 ',': 8,
 '-': 9,
 '.': 10,
 '/': 11,
 '0': 12,
 '1': 13,
 '2': 14,
 '3': 15,
 '4': 16,
 '6': 17,
 '7': 18,
 '8': 19,
 '9': 20,
 ':': 21,
 ';': 22,
 '?': 23,
 'a': 24,
 'b': 25,
 'c': 26,
 'd': 27,
 'e': 28,
 'f': 29,
 'g': 30,
 'h': 31,
 'i': 32,
 'j': 33,
 'k': 34,
 'l': 35,
 'm': 36,
 'n': 37,
 'o': 38,
 'p': 39,
 'q': 40,
 'r': 41,
 's': 42,
 't': 43,
 'u': 44,
 'v': 45,
 'w': 46,
 'x': 47,
 'y': 48,
 'z': 49,
 '´': 50,
 'ç': 51,
 'é': 52,
 'ï': 53,
 '—': 54,
 '…': 55}

In [11]:
idx_to_char

{0: '\n',
 1: ' ',
 2: '!',
 3: '"',
 4: '&',
 5: "'",
 6: '(',
 7: ')',
 8: ',',
 9: '-',
 10: '.',
 11: '/',
 12: '0',
 13: '1',
 14: '2',
 15: '3',
 16: '4',
 17: '6',
 18: '7',
 19: '8',
 20: '9',
 21: ':',
 22: ';',
 23: '?',
 24: 'a',
 25: 'b',
 26: 'c',
 27: 'd',
 28: 'e',
 29: 'f',
 30: 'g',
 31: 'h',
 32: 'i',
 33: 'j',
 34: 'k',
 35: 'l',
 36: 'm',
 37: 'n',
 38: 'o',
 39: 'p',
 40: 'q',
 41: 'r',
 42: 's',
 43: 't',
 44: 'u',
 45: 'v',
 46: 'w',
 47: 'x',
 48: 'y',
 49: 'z',
 50: '´',
 51: 'ç',
 52: 'é',
 53: 'ï',
 54: '—',
 55: '…'}

In [12]:
n_time_steps = 100
X, Y = init_sequences(corpus, char_to_idx, T_x=n_time_steps)
X

array([[24,  1, 42, ..., 28,  1, 42],
       [ 1, 42, 32, ...,  1, 42, 43],
       [42, 32, 36, ..., 42, 43, 28],
       ...,
       [38, 41,  1, ..., 28, 35, 32],
       [41,  1, 43, ..., 35, 32, 28],
       [ 1, 43, 41, ..., 32, 28, 29]])

In [13]:
Y

array([43, 28, 39, ..., 28, 29, 10])

# convert Y data's indeces to their one hot vector representation

In [14]:
Y = one_hot(Y, depth=n_unique)
Y

<tf.Tensor: shape=(226861, 56), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [15]:
Y[3]

<tf.Tensor: shape=(56,), dtype=float32, numpy=
array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.], dtype=float32)>

In [16]:
# number of examples
len(X)

226861

In [17]:
len(X[-1])

100

# Instantiate model with set architecture of generative model

In [18]:
emb_dim = 64
n_a = 32

In [19]:
model = load_alt_model_b(n_unique=n_unique, T_x=n_time_steps, emb_dim=emb_dim, n_a=n_a, keep_prob=0.5, lambda_=0.7)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 64)           3584      
                                                                 
 lstm (LSTM)                 (None, 100, 32)           12416     
                                                                 
 dropout (Dropout)           (None, 100, 32)           0         
                                                                 
 lstm_1 (LSTM)               (None, 32)                8320      
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense (Dense)               (None, 56)                1848      
                                                                 
 batch_normalization (BatchN  (None, 56)               2

# Provide loss, optimizer, and metrics for model

In [20]:
opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
loss = cce_loss()
metrics = [CategoricalAccuracy(), cce_metric()]

model.compile(loss=loss, optimizer=opt, metrics=metrics)

# Train model and checkpoint weights

In [21]:
weights_path = "./weights/weights-improvement-{epoch:02d}-{categorical_accuracy:.4f}.hdf5"
checkpoint = ModelCheckpoint(weights_path, monitor='categorical_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [22]:
model.fit(X, Y, epochs=20, batch_size=2048, callbacks=callbacks_list)

Epoch 1/20

KeyboardInterrupt: 