In [1]:
from pathlib import Path
import pandas as pd
from collections import defaultdict
import copy
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from datetime import datetime

  from ._conv import register_converters as _register_converters


In [9]:
!curl -L -O http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 33.2M  100 33.2M    0     0  1581k      0  0:00:21  0:00:21 --:--:-- 2503k    0     0   604k      0  0:00:56  0:00:04  0:00:52  604k


In [11]:
# !tar xvf simple-examples.tgz

In [12]:
data_path = 'simple-examples/data/'

In [13]:
test_ptb = Path.cwd() / data_path / 'ptb.test.txt'
train_ptb = Path.cwd() / data_path / 'ptb.train.txt'
valid_ptb = Path.cwd() / data_path / 'ptb.valid.txt'

In [14]:
test_ptb.exists(), train_ptb.exists(), valid_ptb.exists() 

(True, True, True)

In [15]:
def read_ptb_file(ptb_file: Path):
    ptb_dict = defaultdict(int)
    one_line = []
    with ptb_file.open(mode='r') as f:
        for ind, sent in enumerate(f):
            sent_words = sent.strip().split()
            sent_words += ['<eos>']
            for word in sent_words:
                ptb_dict[word] += 1
            one_line += sent_words
    print(ind)
    return ptb_dict, one_line

In [16]:
test_dict, test_ol = read_ptb_file(test_ptb)
train_dict, train_ol = read_ptb_file(train_ptb)
valid_dict, valid_ol = read_ptb_file(valid_ptb)
len(test_ol), len(train_ol), len(valid_ol)

3760
42067
3369


(82430, 929589, 73760)

In [17]:
set(train_dict.keys()).issuperset(set(valid_dict.keys()))

True

In [18]:
word_to_ind = {k: ind for ind, (k, v) in enumerate(train_dict.items())}
ind_to_word = {v: k for k, v in word_to_ind.items()}

In [19]:
num_layers = 2 
emb_size = 650 
batch_size = 20 
num_steps = 35 
vocab_size = len(train_dict)
hidden_size = 650
max_grad_norm = 5

In [20]:
def indecise(one_line: list, word_to_ind: dict) -> list:
    return [word_to_ind[word] for word in one_line]

In [21]:
def batch_to_text(batch, ind_to_word):
    vect_func = np.vectorize(lambda x: ind_to_word[x])
    return vect_func(batch)

In [22]:
def generate_batches(ol, word_to_ind, batch_size=5, num_steps=3):
    ol = ol[0:(len(ol) // (batch_size * num_steps)) * batch_size * num_steps]
    ind_ol = np.array(indecise(ol, word_to_ind))
    ind_ol = ind_ol.reshape([batch_size, -1])
    num_batches = ind_ol.shape[1] // num_steps
    for i in range(num_batches):
        yield ind_ol[:, i * num_steps: (i+1) * num_steps]

In [23]:
# gen_batches_X = generate_batches(ol=train_ol, word_to_ind=word_to_ind, batch_size=batch_size, num_steps=num_steps)
# gen_batches_Y = generate_batches(ol=train_ol[1:], word_to_ind=word_to_ind, batch_size=batch_size, num_steps=num_steps)

In [24]:
# x0 = next(gen_batches_X)
# y0 = next(gen_batches_Y)

In [114]:
tf.reset_default_graph()

In [115]:
ph_input_seq = tf.placeholder(dtype=tf.int32, shape=(None, None), name='input_sequence') # bs, seq_len
ph_target_seq = tf.placeholder(dtype=tf.int32, shape=(None, None), name='target_sequence') # bs, seq_len
ph_lr = tf.placeholder(dtype=tf.float32, name='learning_rate')
ph_keep_prob = tf.placeholder(dtype=tf.float32, name='keep_probability')
ph_batch_size = tf.placeholder(tf.int32, [], name='batch_size')
ph_num_steps = tf.placeholder(tf.int32, [], name='num_steps_size')

In [116]:
word_embeddings = tf.get_variable(name='word_embs', 
                                  shape=[vocab_size, emb_size], 
                                  initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05))

In [117]:
batch_mat = tf.nn.embedding_lookup(word_embeddings, ph_input_seq, name='word_emb_lookup')  # bs, seq_len, emb_size 

In [118]:
l_cell = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size)
d_cell = tf.nn.rnn_cell.DropoutWrapper(cell=l_cell, input_keep_prob=ph_keep_prob)
lstm_layers = [d_cell for _ in range(2)]
multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_layers)
initial_state_multi = multi_cell.zero_state(ph_batch_size, dtype=tf.float32)
outputs, state = tf.nn.dynamic_rnn(
                              cell=multi_cell,
                              inputs=batch_mat,
                              dtype=tf.float32,
                              initial_state=initial_state_multi)

In [119]:
logits = tf.contrib.layers.fully_connected(outputs,  # batch_size, num_steps, vocab_size
                                           num_outputs=vocab_size,
                                           activation_fn=None)

In [120]:
probabs = tf.nn.softmax(logits) # batch_size, num_steps, vocab_size

In [121]:
seq_mask = tf.ones(shape=[ph_batch_size, ph_num_steps], dtype=tf.float32)

In [122]:
losses = tf.contrib.seq2seq.sequence_loss(logits,
                                          ph_target_seq,
                                          weights=seq_mask,
                                          average_across_timesteps=True,
                                          average_across_batch=True)

In [123]:
tvars= tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(losses, tvars),
                                  max_grad_norm)
optimizer = tf.train.AdamOptimizer(learning_rate=ph_lr, )
train_op = optimizer.apply_gradients(zip(grads, tvars))

In [124]:

tvars

[<tf.Variable 'word_embs:0' shape=(10000, 200) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_0/lstm_cell/kernel:0' shape=(400, 800) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_0/lstm_cell/bias:0' shape=(800,) dtype=float32_ref>,
 <tf.Variable 'fully_connected/weights:0' shape=(200, 10000) dtype=float32_ref>,
 <tf.Variable 'fully_connected/biases:0' shape=(10000,) dtype=float32_ref>]

In [125]:
# traininig_op = optimizer.minimize(losses)

In [126]:
saver = tf.train.Saver()

In [127]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

In [128]:
summary_loss = tf.summary.scalar(name='scalar_loss', tensor=losses)

In [129]:
num_batches = len(train_ol) // (batch_size) // (num_steps)

In [48]:
def time_str():
    return datetime.now().replace(microsecond=0).isoformat().replace(':', '-')

In [112]:
n_epochs = 39
model_name = 'lm_ptb'
step = 0
lr = 1.0
total_dev_losses = []

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    train_writer = tf.summary.FileWriter(model_name + '/summaries%s' % time_str(), sess.graph)
    for i in range(n_epochs):
        gen_batches_X = generate_batches(ol=train_ol, word_to_ind=word_to_ind, batch_size=batch_size, num_steps=num_steps)
        gen_batches_Y = generate_batches(ol=train_ol[1:], word_to_ind=word_to_ind, batch_size=batch_size, num_steps=num_steps)
        if i > 5:
            lr /= 1.2
        with tqdm(total=num_batches) as p:
            last_state = None
            for x, y in zip(gen_batches_X, gen_batches_Y):
                feed_dict = {
                        ph_input_seq: x, 
                        ph_target_seq: y,
                        ph_lr: lr,
                        ph_batch_size: batch_size,
                        ph_num_steps: num_steps,
                        ph_keep_prob: 0.5
                    }
                if last_state is not None:
                    feed_dict[initial_state_multi] = last_state
                        
                    
                _, last_state = sess.run(
                    [train_op, state],
                    feed_dict=feed_dict)

                if step % 100 == 0:
                    perplex = losses.eval(
                        feed_dict=feed_dict
                    )
                    print(perplex)
                    summ = sess.run(summary_loss, 
                             feed_dict=feed_dict)
                    gen_batches_X = generate_batches(ol=valid_ol, 
                                                     word_to_ind=word_to_ind, 
                                                     batch_size=1, 
                                                     num_steps=1)
                    gen_batches_Y = generate_batches(ol=valid_ol[1:], 
                                                     word_to_ind=word_to_ind, 
                                                     batch_size=1, 
                                                     num_steps=1)
                    last_state = None
                    k, tot_loss = 0, 0
                    for x, y in zip(gen_batches_X, gen_batches_Y):
                        feed_dict = {
                            ph_input_seq: x, 
                            ph_target_seq: y,
                            ph_batch_size: 1,
                            ph_num_steps: 1,
                            ph_keep_prob: 1
                        }
                        if last_state is not None:
                            feed_dict[initial_state_multi] = last_state
                        
                    
                        loss, last_state = sess.run(
                            [losses, state],
                            feed_dict=feed_dict)
                        k += 1
                        tot_loss += loss
                    total_dev_losses.append(tot_loss / k)
                    print(tot_loss / k)
                        
                        
        if (i % 1 == 0):
            print('Epoch ', i)
            saver.save(sess,
                       model_name + '/model_nt/model',
                       global_step=i)

  0%|          | 0/1327 [00:00<?, ?it/s]

19.476135





KeyboardInterrupt: 

In [52]:
sess.close()
sess = tf.InteractiveSession()

In [0]:
tf.reset_default_graph()

In [0]:
tf.trainable_variables()

[<tf.Variable 'word_embs:0' shape=(10000, 200) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_0/lstm_cell/kernel:0' shape=(400, 800) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_0/lstm_cell/bias:0' shape=(800,) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0' shape=(400, 800) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_1/lstm_cell/bias:0' shape=(800,) dtype=float32_ref>,
 <tf.Variable 'fully_connected/weights:0' shape=(200, 10000) dtype=float32_ref>,
 <tf.Variable 'fully_connected/biases:0' shape=(10000,) dtype=float32_ref>]

In [0]:
new_saver = tf.train.import_meta_graph('./lm_ptb/model_nt/model-28.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./lm_ptb/model_nt'))

INFO:tensorflow:Restoring parameters from ./lm_ptb/model_nt/model-29


In [53]:
sess.run(tf.global_variables_initializer())

In [54]:
gen_batches_X = generate_batches(ol=train_ol, word_to_ind=word_to_ind, batch_size=batch_size, num_steps=num_steps)
gen_batches_Y = generate_batches(ol=train_ol[1:], word_to_ind=word_to_ind, batch_size=batch_size, num_steps=num_steps)
x = next(gen_batches_X)
y = next(gen_batches_Y)
feed_dict = {
    ph_input_seq: x, 
    ph_target_seq: y,
    ph_lr: lr,
    ph_batch_size: batch_size,
    ph_num_steps: num_steps
}

In [None]:
lengths=[ph_num_steps] * ph_batch_size

In [86]:
b, n = sess.run([ph_batch_size, tf.shape(ph_input_seq)], feed_dict=feed_dict)

In [87]:
n

array([20, 35], dtype=int32)

In [0]:
np.argwhere(prob[0, 1, :] > 0)

array([[  25],
       [  26],
       [  28],
       ...,
       [9954],
       [9956],
       [9968]])

In [0]:
0sess.run(tf.global_variables_initializer())

SyntaxError: ignored

In [0]:
word_embeddings.eval()

array([[-2.7464739e+01, -3.6920895e+01, -3.6771332e+01, ...,
         2.6273001e+01, -3.7348797e+01,  3.6746384e+01],
       [ 6.3345112e+01, -9.7019281e+00, -5.1374656e-01, ...,
        -1.0484942e+01,  1.3455794e+00,  6.3885632e+01],
       [ 3.1979662e+01, -3.2417019e+01,  2.2178928e+01, ...,
        -3.2884804e+01,  2.2129156e+01, -2.6629168e+01],
       ...,
       [-3.1723948e+01, -3.1731722e+01, -3.1821632e+01, ...,
        -3.1739698e+01, -3.1674166e+01, -3.1669527e+01],
       [ 3.0517769e+01,  3.0611116e+01,  3.0411850e+01, ...,
         3.0528505e+01, -3.0427708e+01, -3.0400440e+01],
       [ 7.7703960e-02, -5.8487877e-03,  3.9323784e-02, ...,
         5.2389242e-02, -7.4387550e-02,  1.6977310e-02]], dtype=float32)

In [57]:
stat = sess.run(ph_num_steps, feed_dict={ph_input_seq: x0, ph_target_seq: y0})

NameError: name 'x0' is not defined

In [58]:
stat[1][1].shape

NameError: name 'stat' is not defined

In [0]:
losses[0].shape

ValueError: ignored

In [0]:
sess.run(multi_cell.initial_state)

In [0]:
losses[0].shape

In [0]:
np.array_equal(out[:, 2, :], stat[1])

In [0]:
stat[1].shape

In [0]:
stat[0].shape

In [0]:
import tensorflow as tf
import numpy as np

In [0]:
n_inputs = 3
n_neurons = 5

In [0]:
X_0 = tf.placeholder(dtype=tf.float32, shape=[None, n_inputs])
X_1 = tf.placeholder(dtype=tf.float32, shape=[None, n_inputs])

In [0]:
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)

In [0]:
output, states = tf.contrib.rnn.static_rnn(basic_cell, [X_0, X_1], dtype=tf.float32)

In [0]:
Y_0, Y_1 = output

In [0]:
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) 
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])

In [0]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    out, stat = sess.run([output, states], feed_dict={X_0: X0_batch, X_1: X1_batch})

In [0]:
out[0].shape, np.array_equal(out[1], stat)

In [0]:
X0_batch.shape