In [1]:
import numpy as np
import string
from rnn import forward_pass, backward_pass, create_datasets, optimize_sgd, init_rnn, one_hot_encode

### Parse text

In [2]:
with open('train.txt', 'r') as _file:
    train_text = _file.read()
train_words = []
strip_str = string.punctuation + string.whitespace
for line in train_text.split('\n'):
    for sentence in line.split('.'):
        words = [x.lower().strip(strip_str) for x in sentence.split() if x != '—']
        train_words.extend(words)

## Set hyperparams

In [3]:
# size of input/target sequences
window_size = 3
# size of word embedding
embedding_dim = 128
p_train = 0.8
# num of training epochs
num_epochs = 30
# size of hidden state
hidden_size = 256
# learning rate of SGD optimizer
lr = .2
# gradient norm clipping
clip_norm = .8
# print loss every n epochs
print_loss_n = 2
# wait n epochs if val loss doesn't improve
patience = 2

### Prepare input and target arrays, split to train, val, test datasets

In [4]:
word_to_idx = {}
idx_to_word = {}
word_to_idx = {word: i for i, word in enumerate(sorted(set(train_words)))}
idx_to_word = {i: word for word, i in word_to_idx.items()}
data_x = []
data_y = []
n = len(train_words)
vocab_size = len(word_to_idx)
for i in range(0, n - window_size * 2, 1):
    rb = i + window_size
    words_x = train_words[i:rb]
    # input array of indexes of words
    sample_x = np.array([word_to_idx[word] for word in words_x])
    words_y = train_words[rb:rb + window_size]
    # one-hot encoded target array
    sample_y = np.array([one_hot_encode(word_to_idx[word], vocab_size) for word in words_y])
    data_x.append(sample_x)
    data_y.append(sample_y)
data_x = np.array(data_x)
data_y = np.array(data_y)
p_val = .5 - p_train / 2
training_set, validation_set, test_set = create_datasets(data_x, data_y, p_train, p_val, p_val)

## Check dataset inputs and targets

In [16]:
data_x, data_y = list(zip(*test_set))
i = 10
print([idx_to_word[idx] for idx in data_x[i]])
print([idx_to_word[np.argmax(idx)] for idx in data_y[i]])

['наших', 'ограничений', 'пусть']
['позитив', 'будет', 'вашим']


## Train RNN

Я использовал алгоритм оптимизации SGD с gradient clipping, чтобы избежать проблему исчезающих градиентов. Обучение останавливается, если функция ошибки на валидационном датасете не улучшается `patience` эпох подряд

In [26]:
train_num = len(training_set)
val_num = len(validation_set)

# Initialize a network
params = init_rnn(hidden_size, vocab_size, embedding_dim)

# Initialize hidden state as zeros
hidden_state = np.zeros((hidden_size, 1))

# Track loss
training_loss, validation_loss = [], []
num_no_improving = 0
for i in range(num_epochs):
    epoch_training_loss = 0
    epoch_validation_loss = 0

    for inputs, targets in validation_set:
        # Re-initialize hidden state
        hidden_state = np.zeros_like(hidden_state)
        outputs, hidden_states = forward_pass(inputs, hidden_state, params)
        loss, _ = backward_pass(inputs, outputs, hidden_states, targets, params)
        # Update loss
        epoch_validation_loss += loss

    for inputs, targets in training_set:
        # Re-initialize hidden state
        hidden_state = np.zeros_like(hidden_state)
        outputs, hidden_states = forward_pass(inputs, hidden_state, params)
        loss, grads = backward_pass(inputs, outputs, hidden_states, targets, params)
        if np.isnan(loss):
            raise ValueError('Gradients have vanished!')
        # Update parameters
        params = optimize_sgd(params, grads, lr, clip_norm)
        # Update loss
        epoch_training_loss += loss
        
    # Save loss for plot
    training_loss.append(epoch_training_loss / train_num)
    validation_loss.append(epoch_validation_loss / val_num)
    # Print loss every N epochs
    if i % print_loss_n == 0:
        print(f'Epoch {i}, training loss: {round(training_loss[-1], 3)}, validation loss: {round(validation_loss[-1], 3)}')
    # stop if val loss didn't improve from previous epoch
    if len(validation_loss) > 2 and validation_loss[-2] < validation_loss[-1]:
        num_no_improving += 1
    else:
        num_no_improving = 0
    if num_no_improving >= patience:
        break

Epoch 0, training loss: 0.021, validation loss: 0.024
Epoch 2, training loss: 0.01, validation loss: 0.016
Epoch 4, training loss: 0.005, validation loss: 0.013
Epoch 6, training loss: 0.004, validation loss: 0.011
Epoch 8, training loss: 0.003, validation loss: 0.011


## Inference of the model on train dataset

In [27]:
def inference_model_and_print(inputs, targets):
    # Initialize hidden state as zeros
    hidden_state = np.zeros((hidden_size, 1))

    # Forward pass
    outputs, hidden_states = forward_pass(inputs, hidden_state, params)
    input_sentence = [idx_to_word[x] for x in inputs]
    output_sentence = [idx_to_word[np.argmax(x)] for x in outputs]
    target_sentence = [idx_to_word[np.argmax(x)] for x in targets]
    print('Input sequence:', input_sentence)
    print('Target sequence:', target_sentence)
    print('Predicted sequence:', output_sentence)

In [28]:
for i in range(15):
    inputs, targets = training_set[i]
    inference_model_and_print(inputs, targets)
    

Input sequence: ['привнести', 'позитивную', 'энергию']
Target sequence: ['в', 'нашу', 'повседневную']
Predicted sequence: ['в', 'нашу', 'повседневную']
Input sequence: ['но', 'вы', 'сможете']
Target sequence: ['сделать', 'это', 'чем']
Predicted sequence: ['раздвинуть', 'это', 'чем']
Input sequence: ['мой', 'опыт', 'никому']
Target sequence: ['не', 'нужен', 'вокруг']
Predicted sequence: ['не', 'нужен', 'вокруг']
Input sequence: ['защитит', 'вас', 'от']
Target sequence: ['стрелы', 'негатива', 'если']
Predicted sequence: ['стрелы', 'негатива', 'если']
Input sequence: ['на', 'своей', 'личности']
Target sequence: ['а', 'не', 'на']
Predicted sequence: ['наказуема', 'своих', 'на']
Input sequence: ['своего', 'сердца', 'верьте']
Target sequence: ['что', 'вам', 'суждено']
Predicted sequence: ['что', 'вам', 'суждено']
Input sequence: ['увидеть', 'как', 'ты']
Target sequence: ['становишься', 'сильнее', 'и']
Predicted sequence: ['становишься', 'сильнее', 'и']
Input sequence: ['конце', 'туннеля', 'е

## Inference of the model on val dataset

In [29]:
for i in range(15):
    inputs, targets = validation_set[i]
    inference_model_and_print(inputs, targets)


Input sequence: ['два', 'варианта', 'быть']
Target sequence: ['положительным', 'или', 'отрицательным']
Predicted sequence: ['день', 'кто', 'в']
Input sequence: ['успешным', 'тяжелые', 'времена']
Target sequence: ['часто', 'приводят', 'к']
Predicted sequence: ['часто', 'приводят', 'к']
Input sequence: ['не', 'всегда', 'приносят']
Target sequence: ['рост', 'но', 'без']
Predicted sequence: ['а', 'позитив', 'без']
Input sequence: ['из', 'тех', 'дней']
Target sequence: ['когда', 'не', 'чувствуете']
Predicted sequence: ['вещь', 'не', 'чувствуете']
Input sequence: ['всегда', 'напоминайте', 'себе']
Target sequence: ['что', 'вам', 'не']
Predicted sequence: ['самое', 'и', 'и']
Input sequence: ['день', 'либо', 'день']
Target sequence: ['бежит', 'за', 'тобой']
Predicted sequence: ['позитивным', 'не', 'только']
Input sequence: ['возможности', 'того', 'что']
Target sequence: ['вы', 'планируете', 'делать']
Predicted sequence: ['препятствия', 'потому', 'сделать']
Input sequence: ['нужна', 'мне', 'всег

## Inference of the model on test dataset

In [30]:
for i in range(15):
    inputs, targets = test_set[i]
    inference_model_and_print(inputs, targets)


Input sequence: ['новое', 'и', 'неизвестное']
Target sequence: ['–', 'это', 'опасно']
Predicted sequence: ['–', 'положительных', 'я']
Input sequence: ['подведут', 'этих', 'жизненных']
Target sequence: ['препятствий', 'никогда', 'не']
Predicted sequence: ['на', 'что', 'не']
Input sequence: ['помогай', 'это', 'к']
Target sequence: ['добру', 'не', 'приведет']
Predicted sequence: ['добру', 'опасно', 'приведет']
Input sequence: ['настроение', 'добавляет', 'годы']
Target sequence: ['к', 'вашей', 'жизни']
Predicted sequence: ['к', 'в', 'только']
Input sequence: ['вы', 'строите', 'в']
Target sequence: ['своем', 'уме', 'никогда']
Predicted sequence: ['вас', 'эту', 'позитивно']
Input sequence: ['нужно', 'подняться', 'это']
Target sequence: ['та', 'которую', 'вы']
Predicted sequence: ['по', 'значит', 'позитивным']
Input sequence: ['не', 'нужно', 'делать']
Target sequence: ['то', 'что', 'делают']
Predicted sequence: ['а', 'по', 'все']
Input sequence: ['того', 'чтобы', 'заработать']
Target sequence