In [3]:
!nvidia-smi

Sun May 17 02:02:15 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P8     9W /  70W |      0MiB / 15079MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [4]:
from google.colab import drive
drive.mount("/content/gdrive")

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
import os
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Hashing")

In [0]:
import numpy as np
def load_dataset(train_x_path, train_y_path, test_x_path):
  train_X = np.loadtxt(train_x_path)
  train_Y = np.loadtxt(train_y_path)
  test_X = np.loadtxt(test_x_path)

  train_X.dtype = "float64"
  train_Y.dtype = "float64"
  test_X.dtype = "float64"

  return train_X, train_Y, test_X

In [0]:
train_x_path = "./gen_data/train_X"
train_y_path = "./gen_data/train_Y"
test_x_path = "./gen_data/test_X"
train_X, train_Y, test_X = load_dataset(train_x_path, train_y_path, test_x_path)

In [0]:
def load_vocab(vocab_path):
  vocab = {}
  reverse_vocab = {}
  for line in open(vocab_path, "r", encoding="utf-8").readlines():
    word, index = line.strip().split("\t")
    index = int(index)
    vocab[word] = index
    reverse_vocab[index] = word

  return vocab, reverse_vocab 

In [0]:
vocab_path = "./gen_data/vocab.txt"
vocab, reverse_vocab=load_vocab(vocab_path)

In [0]:
embedding_matrix = np.loadtxt("./gen_data/embedding_matrix")

In [11]:
input_length = train_X.shape[1]
input_length

260

In [12]:
output_sequence_length = train_Y.shape[1]
output_sequence_length

33

In [0]:
vocab_size = len(vocab)

In [14]:
vocab_size

32909

In [0]:
sample_num = 640
train_X = train_X[:sample_num]
train_Y = train_Y[:sample_num]

In [16]:
BUFFER_SIZE = len(train_X)
BUFFER_SIZE

640

In [17]:
max_length_inp = train_X.shape[1]
max_length_targ = train_Y.shape[1]
print("input length: {0}, output length: {1}".format(max_length_inp, max_length_targ))

input length: 260, output length: 33


In [0]:
BATCH_SIZE = 64
step_per_epoch = len(train_X) // BATCH_SIZE

In [0]:
embedding_dim = 300
units = 1024
vocab_size = len(vocab)

In [0]:
import tensorflow as tf
dataset = tf.data.Dataset.from_tensor_slices((train_X, train_Y)).shuffle(BUFFER_SIZE)

In [0]:
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [22]:
dataset

<BatchDataset shapes: ((64, 260), (64, 33)), types: (tf.float64, tf.float64)>

In [23]:
train_X.shape

(640, 260)

In [0]:
## 构建Encoder

In [0]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, embedding_matrix, enc_units, batch_size):
    super(Encoder, self).__init__()
    self.batch_size = batch_size
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, weights=[embedding_matrix], trainable=True)
    self.gru = tf.keras.layers.GRU(self.enc_units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform")

  def __call__(self, x, hidden):
    x = self.embedding(x)
    print(x.shape)
    output, state = self.gru(x, initial_state=hidden)
    return output, state

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_size, self.enc_units))

In [0]:
encoder = Encoder(vocab_size, embedding_dim,embedding_matrix, units, BATCH_SIZE)

In [27]:
encoder

<__main__.Encoder at 0x7f4a534cfd30>

In [0]:
example_input_batch = tf.ones(shape=(BATCH_SIZE, max_length_inp), dtype=tf.int32)

In [29]:
example_input_batch.shape

TensorShape([64, 260])

In [0]:
sample_hidden = encoder.initialize_hidden_state()

In [31]:
sample_hidden.shape

TensorShape([64, 1024])

In [32]:
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)

(64, 260, 300)


In [33]:
sample_output.shape

TensorShape([64, 260, 1024])

In [34]:
sample_hidden.shape

TensorShape([64, 1024])

In [0]:
## 构建Attention

In [0]:
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
    hidden_with_time_axis = tf.expand_dims(query, 1)

    score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))

    attention_weights = tf.nn.softmax(score, axis=1)

    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

In [0]:
## 测试Attention

In [38]:
attention_layer = BahdanauAttention(10)
attention_layer

<__main__.BahdanauAttention at 0x7f4a4eebe940>

In [0]:
attention_result, attention_weights = attention_layer(sample_hidden, sample_output)

In [40]:
attention_result

<tf.Tensor: shape=(64, 1024), dtype=float32, numpy=
array([[-0.36441872,  0.08861268,  0.01415164, ..., -0.19742776,
        -0.3552074 ,  0.36955377],
       [-0.36441872,  0.08861268,  0.01415164, ..., -0.19742776,
        -0.3552074 ,  0.36955377],
       [-0.36441872,  0.08861268,  0.01415164, ..., -0.19742776,
        -0.3552074 ,  0.36955377],
       ...,
       [-0.36441872,  0.08861268,  0.01415164, ..., -0.19742776,
        -0.3552074 ,  0.36955377],
       [-0.36441872,  0.08861268,  0.01415164, ..., -0.19742776,
        -0.3552074 ,  0.36955377],
       [-0.36441872,  0.08861268,  0.01415164, ..., -0.19742776,
        -0.3552074 ,  0.36955377]], dtype=float32)>

In [41]:
attention_weights.shape

TensorShape([64, 260, 1])

In [0]:
## 构建Decoder

In [0]:
# help(tf.keras.layers.GRU)

In [0]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, embedding_matrix, dec_units, batch_size):
    super(Decoder, self).__init__()
    self.batch_size = batch_size
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.
    (vocab_size, embedding_dim, weights=[embedding_matrix], trainable=True)
    self.gru = tf.keras.layers.GRU(self.dec_units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform")

    self.fc = tf.keras.layers.Dense(vocab_size)

    # used for attention
    self.attention = BahdanauAttention(self.dec_units)
  
  def call(self, x, hidden, enc_output):
    context_vector, attention_weights = self.attention(hidden, enc_output)
    x = self.embedding(x)
    print("after embedding shape:", x.shape)
    print("context_vector shape:", context_vector.shape)
    print("attention_weights shape:", attention_weights.shape)

    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
    print("expand context_vector shape:", tf.expand_dims(context_vector, 1).shape)
    print("after concat x's shape:", x.shape)

    output, state = self.gru(x)
    print("output shape:", output.shape)
    print("state shape:", state.shape)
    
    output = tf.reshape(output, (-1, output.shape[2]))
    print("after reshape output's shape:", output.shape)

    x = self.fc(output)
    print("x shape:", x.shape)
    

    return x, state, attention_weights

In [0]:
## 测试 Decoder

In [46]:
decoder = Decoder(vocab_size, embedding_dim, embedding_matrix, units, BATCH_SIZE)

sample_decoder_output, _, _ = decoder(tf.random.uniform((64, 1)), sample_hidden, sample_output)

after embedding shape: (64, 1, 300)
context_vector shape: (64, 1024)
attention_weights shape: (64, 260, 1)
expand context_vector shape: (64, 1, 1024)
after concat x's shape: (64, 1, 1324)
output shape: (64, 1, 1024)
state shape: (64, 1024)
after reshape output's shape: (64, 1024)
x shape: (64, 32909)


In [47]:
tf.random.uniform((64, 1)).shape

TensorShape([64, 1])

In [0]:
## 定义优化函数

In [0]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction="none")

pad_index = vocab["<PAD>"]

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, pad_index))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

In [0]:
## 保存点设置

In [0]:
checkpoint_dir = "./gen_data/checkpoints/training"
if not os.path.exists(checkpoint_dir):
  os.makedirs(checkpoint_dir)
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder)

In [83]:
checkpoint_prefix

'./gen_data/checkpoints/training/ckpt'

In [0]:
## 训练

In [0]:
# @tf.function
# def train_step(inp, targ, enc_hidden):
#   loss = 0

#   with tf.GradientTape() as tape:
#     # 1. 构建encoder
#     enc_output, enc_hidden = encoder(inp, enc_hidden)
#     # 2. 复制
#     dec_hidden = enc_hidden
#     # 3. <START> * BATCH_SIZE
#     dec_input = tf.expand_dims([vocab["<START>"]] * BATCH_SIZE, 1)

#     # Teacher forcing - feeding the target as the next input
#     for t in range(1, targ.shape[1]):
#       # decoder(x, hidden, enc_output)
#       predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

#       loss += loss_function(targ[:, t], predictions)

#       dec_input = tf.expand_dims(targ[:, t], 1)
    
#     batch_loss = (loss / int(targ.shape[1]))

#     variables = encoder.trainable_variables + decoder.trainable_variables

#     gradients = tape.gradient(loss, variables)

#     optimizer.apply_gradients(zip(gradients, variables))

#     return batch_loss

In [0]:
@tf.function
def train_step(inp, targ, enc_hidden):
    loss = 0

    with tf.GradientTape() as tape:
        # 1. 构建encoder
        enc_output, enc_hidden = encoder(inp, enc_hidden)
        # 2. 复制
        dec_hidden = enc_hidden
        # 3. <START> * BATCH_SIZE 
        dec_input = tf.expand_dims([vocab['<START>']] * BATCH_SIZE, 1)

        # Teacher forcing - feeding the target as the next input
        for t in range(1, targ.shape[1]):
            # decoder(x, hidden, enc_output)
            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)
            
            loss += loss_function(targ[:, t], predictions)

            # using teacher forcing
            dec_input = tf.expand_dims(targ[:, t], 1)

        batch_loss = (loss / int(targ.shape[1]))

        variables = encoder.trainable_variables + decoder.trainable_variables

        gradients = tape.gradient(loss, variables)

        optimizer.apply_gradients(zip(gradients, variables))

        return batch_loss

In [0]:
# for (batch, (inp, targ)) in enumerate(dataset.take(step_per_epoch)):
#   print(inp)
#   print(targ)

In [0]:
EPOCH = 10

In [84]:
import time
for epoch in range(EPOCH):
  start = time.time()

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  for (batch, (inp, targ)) in enumerate(dataset.take(step_per_epoch)):
    batch_loss = train_step(inp, targ, enc_hidden)
    total_loss += batch_loss

    if batch % 1 == 0:
      print("Epoch {} step {} Loss: {:.4f}".format(epoch+1, batch, batch_loss.numpy()))
  
  if (epoch+1) % 2 == 0:
    checkpoint.save(file_prefix=checkpoint_prefix)
  
  print('Epoch {} Loss {:.4f}'.format(epoch + 1, total_loss / step_per_epoch))
  print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 step 0 Loss: 2.4356
Epoch 1 step 1 Loss: 2.9044
Epoch 1 step 2 Loss: 2.5568
Epoch 1 step 3 Loss: 2.3728
Epoch 1 step 4 Loss: 2.7195
Epoch 1 step 5 Loss: 2.3557
Epoch 1 step 6 Loss: 2.8366
Epoch 1 step 7 Loss: 2.7939
Epoch 1 step 8 Loss: 2.3074
Epoch 1 step 9 Loss: 2.7521
Epoch 1 Loss 2.6035
Time taken for 1 epoch 14.592399597167969 sec

Epoch 2 step 0 Loss: 2.3055
Epoch 2 step 1 Loss: 2.8262
Epoch 2 step 2 Loss: 2.6387
Epoch 2 step 3 Loss: 2.6184
Epoch 2 step 4 Loss: 2.3754
Epoch 2 step 5 Loss: 1.9447
Epoch 2 step 6 Loss: 2.1545
Epoch 2 step 7 Loss: 2.6113
Epoch 2 step 8 Loss: 3.0056
Epoch 2 step 9 Loss: 2.0810
Epoch 2 Loss 2.4561
Time taken for 1 epoch 17.902633905410767 sec

Epoch 3 step 0 Loss: 2.7619
Epoch 3 step 1 Loss: 2.5648
Epoch 3 step 2 Loss: 2.2469
Epoch 3 step 3 Loss: 2.4273
Epoch 3 step 4 Loss: 2.1832
Epoch 3 step 5 Loss: 2.0798
Epoch 3 step 6 Loss: 1.9983
Epoch 3 step 7 Loss: 2.2401
Epoch 3 step 8 Loss: 2.1732
Epoch 3 step 9 Loss: 2.0613
Epoch 3 Loss 2.2737
Time t

In [2]:
tf.__version__

NameError: ignored

In [0]:
import os, sys
sys.path.append("./utils")

In [8]:
ls

[0m[01;34msample_data[0m/


In [7]:
from data_loader import preprocess_sentence
import matplotlib
from matplotlib import font_manager
# 解决中文乱码
font=font_manager.FontProperties(fname="./gen_data/simhei.ttf")
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

ModuleNotFoundError: ignored