In [2]:
!nvidia-smi

Sat May 16 14:19:44 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   55C    P8    29W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [3]:
from google.colab import drive
drive.mount("/content/gdrive")

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
import os
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Hashing")

In [0]:
import numpy as np
def load_dataset(train_x_path, train_y_path, test_x_path):
  train_X = np.loadtxt(train_x_path)
  train_Y = np.loadtxt(train_y_path)
  test_X = np.loadtxt(test_x_path)

  train_X.dtype = "float64"
  train_Y.dtype = "float64"
  test_X.dtype = "float64"

  return train_X, train_Y, test_X

In [0]:
train_x_path = "./gen_data/train_X"
train_y_path = "./gen_data/train_Y"
test_x_path = "./gen_data/test_X"
train_X, train_Y, test_X = load_dataset(train_x_path, train_y_path, test_x_path)

In [0]:
def load_vocab(vocab_path):
  vocab = {}
  reverse_vocab = {}
  for line in open(vocab_path, "r", encoding="utf-8").readlines():
    word, index = line.strip().split("\t")
    index = int(index)
    vocab[word] = index
    reverse_vocab[index] = word

  return vocab, reverse_vocab 

In [0]:
vocab_path = "./gen_data/vocab.txt"
vocab, reverse_vocab=load_vocab(vocab_path)

In [0]:
embedding_matrix = np.loadtxt("./gen_data/embedding_matrix")

In [10]:
input_length = train_X.shape[1]
input_length

260

In [11]:
output_sequence_length = train_Y.shape[1]
output_sequence_length

33

In [0]:
vocab_size = len(vocab)

In [13]:
vocab_size

32909

In [0]:
sample_num = 640
train_X = train_X[:sample_num]
train_Y = train_Y[:sample_num]

In [15]:
BUFFER_SIZE = len(train_X)
BUFFER_SIZE

640

In [16]:
max_length_inp = train_X.shape[1]
max_length_targ = train_Y.shape[1]
print("input length: {0}, output length: {1}".format(max_length_inp, max_length_targ))

input length: 260, output length: 33


In [0]:
BATCH_SIZE = 64
step_per_epoch = len(train_X) // BATCH_SIZE

In [0]:
embedding_dim = 300
units = 1024
vocab_size = len(vocab)

In [0]:
import tensorflow as tf
dataset = tf.data.Dataset.from_tensor_slices((train_X, train_Y)).shuffle(BUFFER_SIZE)

In [0]:
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [21]:
dataset

<BatchDataset shapes: ((64, 260), (64, 33)), types: (tf.float64, tf.float64)>

In [22]:
train_X.shape

(640, 260)

In [0]:
## 构建Encoder

In [0]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, embedding_matrix, enc_units, batch_size):
    super(Encoder, self).__init__()
    self.batch_size = batch_size
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, weights=[embedding_matrix], trainable=True)
    self.gru = tf.keras.layers.GRU(self.enc_units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform")

  def __call__(self, x, hidden):
    x = self.embedding(x)
    print(x.shape)
    output, state = self.gru(x, initial_state=hidden)
    return output, state

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_size, self.enc_units))

In [0]:
encoder = Encoder(vocab_size, embedding_dim,embedding_matrix, units, BATCH_SIZE)

In [26]:
encoder

<__main__.Encoder at 0x7f4169b227f0>

In [0]:
example_input_batch = tf.ones(shape=(BATCH_SIZE, max_length_inp), dtype=tf.int32)

In [28]:
example_input_batch.shape

TensorShape([64, 260])

In [0]:
sample_hidden = encoder.initialize_hidden_state()

In [30]:
sample_hidden.shape

TensorShape([64, 1024])

In [31]:
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)

(64, 260, 300)


In [32]:
sample_output.shape

TensorShape([64, 260, 1024])

In [33]:
sample_hidden.shape

TensorShape([64, 1024])

In [0]:
## 构建Attention

In [0]:
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
    hidden_with_time_axis = tf.expand_dims(query, 1)

    score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))

    attention_weights = tf.nn.softmax(score, axis=1)

    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

In [0]:
## 测试Attention

In [37]:
attention_layer = BahdanauAttention(10)
attention_layer

<__main__.BahdanauAttention at 0x7f4130683e48>

In [0]:
attention_result, attention_weights = attention_layer(sample_hidden, sample_output)

In [39]:
attention_result

<tf.Tensor: shape=(64, 1024), dtype=float32, numpy=
array([[-0.19075976, -0.14877167,  0.18199638, ...,  0.4263354 ,
         0.26286328, -0.51677346],
       [-0.19075976, -0.14877167,  0.18199638, ...,  0.4263354 ,
         0.26286328, -0.51677346],
       [-0.19075976, -0.14877167,  0.18199638, ...,  0.4263354 ,
         0.26286328, -0.51677346],
       ...,
       [-0.19075976, -0.14877167,  0.18199638, ...,  0.4263354 ,
         0.26286328, -0.51677346],
       [-0.19075976, -0.14877167,  0.18199638, ...,  0.4263354 ,
         0.26286328, -0.51677346],
       [-0.19075976, -0.14877167,  0.18199638, ...,  0.4263354 ,
         0.26286328, -0.51677346]], dtype=float32)>

In [40]:
attention_weights.shape

TensorShape([64, 260, 1])

In [0]:
## 构建Decoder

In [56]:
help(tf.keras.layers.GRU)

Help on class GRU in module tensorflow.python.keras.layers.recurrent_v2:

class GRU(tensorflow.python.keras.layers.recurrent.DropoutRNNCellMixin, tensorflow.python.keras.layers.recurrent.GRU)
 |  Gated Recurrent Unit - Cho et al. 2014.
 |  
 |  See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
 |  for details about the usage of RNN API.
 |  
 |  Based on available runtime hardware and constraints, this layer
 |  will choose different implementations (cuDNN-based or pure-TensorFlow)
 |  to maximize the performance. If a GPU is available and all
 |  the arguments to the layer meet the requirement of the CuDNN kernel
 |  (see below for details), the layer will use a fast cuDNN implementation.
 |  
 |  The requirements to use the cuDNN implementation are:
 |  
 |  1. `activation` == `tanh`
 |  2. `recurrent_activation` == `sigmoid`
 |  3. `recurrent_dropout` == 0
 |  4. `unroll` is `False`
 |  5. `use_bias` is `True`
 |  6. `reset_after` is `True`
 |  7. Inputs are 

In [0]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, embedding_matrix, dec_units, batch_size):
    super(Decoder, self).__init__()
    self.batch_size = batch_size
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, weights=[embedding_matrix], trainable=True)
    self.gru = tf.keras.layers.GRU(self.dec_units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform")

    self.fc = tf.keras.layers.Dense(vocab_size)

    # used for attention
    self.attention = BahdanauAttention(self.dec_units)
  
  def call(self, x, hidden, enc_output):
    context_vector, attention_weights = self.attention(hidden, enc_output)
    x = self.embedding(x)
    print("after embedding shape:", x.shape)
    print("context_vector shape:", context_vector.shape)
    print("attention_weights shape:", attention_weights.shape)

    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
    print("expand context_vector shape:", tf.expand_dims(context_vector, 1).shape)
    print("after concat x's shape:", x.shape)

    output, state = self.gru(x)
    print("output shape:", output.shape)
    print("state shape:", state.shape)
    
    output = tf.reshape(output, (-1, output.shape[2]))
    print("after reshape output's shape:", output.shape)

    x = self.fc(output)
    print("x shape:", x.shape)
    

    return x, state, attention_weights

In [0]:
## 测试 Decoder

In [69]:
decoder = Decoder(vocab_size, embedding_dim, embedding_matrix, units, BATCH_SIZE)

sample_decoder_output, _, _ = decoder(tf.random.uniform((64, 1)), sample_hidden, sample_output)

after embedding shape: (64, 1, 300)
context_vector shape: (64, 1024)
attention_weights shape: (64, 260, 1)
expand context_vector shape: (64, 1, 1024)
after concat x's shape: (64, 1, 1324)
output shape: (64, 1, 1024)
state shape: (64, 1024)
after reshape output's shape: (64, 1024)
x shape: (64, 32909)


In [66]:
tf.random.uniform((64, 1)).shape

TensorShape([64, 1])