In [322]:
import tensorflow as tf
import numpy as np
import jieba
import time
print(tf.__version__)

2.2.0


In [323]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocabulary_number, vocabulary_dimension, vocabulary_matrix, gru_unites, batch_size):
        super(Encoder, self).__init__()
        self.batch_size = batch_size
        self.gru_unites = gru_unites
        self.embedding = tf.keras.layers.Embedding(vocabulary_number, vocabulary_dimension, 
#                                                weights=vocabulary_matrix, trainable=False
                                                  )
        self.gru = tf.keras.layers.GRU(gru_unites, return_sequences=True, return_state=True)
    
    def call(self, inp, hidden):
        x = self.embedding(inp)
        x, hidden = self.gru(x, initial_state=hidden)
        return x, hidden
    
    def initialize_hidden_state(self):
        return tf.zeros((self.batch_size, self.gru_unites))
    

In [324]:
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        self.w1 = tf.keras.layers.Dense(units)
        self.w2 = tf.keras.layers.Dense(units)
        self.v = tf.keras.layers.Dense(1)
    
    def call(self, dec_hidden, enc_output):
        hidden_with_time_axis = tf.expand_dims(dec_hidden, 1)
        score = self.v(tf.nn.tanh(self.w1(enc_output) + self.w2(hidden_with_time_axis)))
        attn_dist = tf.nn.softmax(score, axis=1)
#         attn_dist = tf.expand_dims(attn_dist, axis=2)
        context_vector = attn_dist * enc_output
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attn_dist
    

In [325]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, vocabulary_number, vocabulary_dimension, vocabulary_matrix, unites, batch_size):
        super(Decoder, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocabulary_number, vocabulary_dimension, 
                                                  weights=[vocabulary_matrix], trainable=False)
        self.gru = tf.keras.layers.GRU(unites, return_sequences=True, return_state=True)
        self.fc = tf.keras.layers.Dense(vocabulary_number, activation=tf.keras.activations.softmax)
        
        # BahdanauAttention
        self.w1 = tf.keras.layers.Dense(unites)
        self.w2 = tf.keras.layers.Dense(unites)
        self.v = tf.keras.layers.Dense(1)
    
    def call(self, x, dec_hidden, enc_output):
        # dec_hidden shape == (batch_size, hidden size)
        # enc_output shape == (batch_size, max_length, hidden_size)

        hidden_with_time_axis = tf.expand_dims(dec_hidden, 1)
        score = tf.nn.tanh(self.w1(enc_output) + self.w2(hidden_with_time_axis))
        attention_weights = tf.nn.softmax(self.v(score), axis=1)
        context_vector = attention_weights * enc_output
        context_vector = tf.reduce_sum(context_vector, axis=1)
        
#         print(x.shape)
#         print(x)
#         e = tf.keras.layers.Embedding(len(targ_lang.word2idx), vocabulary_dimension)
#         print(e(x))
#         self.embedding(1)
        x = self.embedding(x)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
        
        output, state = self.gru(x)
        
        # output shape == (batch_size * max_length, hidden_size)
        output = tf.reshape(output, (-1, output.shape[2]))
        
        # output shape == (batch_size * max_length, vocab)
        output = self.fc(output)
        
        return output, state, attention_weights
        
    def initialize_hidden_state(self):
        return tf.zeros((self.batch_sz, self.dec_units))


In [326]:
def loss_function(real, pred):
    mask = 1 - np.equal(real, 0)
    loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) * mask
    return tf.reduce_mean(loss_)

In [327]:
class LanguageIndex():
    def __init__(self, lang):
        self.lang = lang
        self.word2idx = {}
        self.idx2word = {}
        self.vocab = set()
        
        self.create_index()
        
    def create_index(self):
        for phrase in self.lang:
            self.vocab.update(phrase)
        
        self.word2idx['<pad>'] = 0
        for index, word in enumerate(self.vocab):
            self.word2idx[word] = index + 1
        
        for word, index in self.word2idx.items():
            self.idx2word[index] = word

In [328]:
chinese = """5月7日，泰晶转债开盘暴跌30%而暂停交易。
收盘前的最后三分钟交易重新打开（按照规定，尾盘三分钟，可转债不设涨跌幅限制），泰晶转债最终跌幅扩大至47.68%。
此次暴跌源于5月6日晚间泰晶科技发布的“关于提前赎回泰晶转债的提示性公告”，再结合此前再升转债宣布赎回时的暴跌，反衬出当前可转债市场的非理性狂热。
可转债就是可以选择转股的债券。
一般期限为6年左右，可转股日为发行后6个月，具体要看各家发行的报告。
发行后半年投资者可以选择转股，按照票面金额来转股，例如100元的票面，转股价格20元，当前股价为40元，则对应转股价值为200元。
若转债价格高于200元，就会产生正的转股溢价率。
转债的纯债价值可以对投资者起到保护作用，一旦股价下跌过多，没有了转股价值，投资者至少可以以纯债的形式持有。""".split('\n')
english = """On May 7, the opening of Taijing Convertible Bonds plunged 30% and trading was suspended. 
The trading was reopened in the last three minutes before the close (according to the regulations, there is no limit for the rise and fall of convertible bonds for three minutes at the end of the day), and the final decline of Taijing Convertible Bonds expanded to 47.68%. 
This plunge originated from the "Informative Announcement on Redemption of Taijing Convertible Bonds in Advance" released by Taijing Technology on the evening of May 6, combined with the plunge when the previous redemption of convertible bonds was announced to redeem, reflecting the current convertible bonds Irrational fanaticism in the market.
Convertible bonds are bonds that can be converted into shares. 
The general term is about 6 years, and the conversion date is 6 months after the issue, depending on the report issued by each company. 
Investors can choose to convert shares half a year after the issuance and convert the shares according to the face value. 
For example, the face value of 100 yuan, the conversion price of 20 yuan, the current stock price of 40 yuan, then the corresponding value of 200 yuan. 
If the convertible bond price is higher than 200 yuan, a positive conversion premium rate will be generated. 
The pure debt value of convertible bonds can play a protective role for investors. Once the stock price falls too much, there is no conversion value, and investors can at least hold it in the form of pure debt.""".split('\n')
chinese = [['<start>'] + jieba.lcut(el) + ['<end>'] for el in chinese]
english = [['<start>'] + [e for e in jieba.lcut(el) if e != ' '] + ['<end>'] for el in english]
chinese[1], english[1]

(['<start>',
  '收盘',
  '前',
  '的',
  '最后',
  '三分钟',
  '交易',
  '重新',
  '打开',
  '（',
  '按照',
  '规定',
  '，',
  '尾盘',
  '三分钟',
  '，',
  '可转债',
  '不设',
  '涨跌幅',
  '限制',
  '）',
  '，',
  '泰晶',
  '转债',
  '最终',
  '跌幅',
  '扩大',
  '至',
  '47.68%',
  '。',
  '<end>'],
 ['<start>',
  'The',
  'trading',
  'was',
  'reopened',
  'in',
  'the',
  'last',
  'three',
  'minutes',
  'before',
  'the',
  'close',
  '(',
  'according',
  'to',
  'the',
  'regulations',
  ',',
  'there',
  'is',
  'no',
  'limit',
  'for',
  'the',
  'rise',
  'and',
  'fall',
  'of',
  'convertible',
  'bonds',
  'for',
  'three',
  'minutes',
  'at',
  'the',
  'end',
  'of',
  'the',
  'day',
  ')',
  ',',
  'and',
  'the',
  'final',
  'decline',
  'of',
  'Taijing',
  'Convertible',
  'Bonds',
  'expanded',
  'to',
  '47.68%',
  '.',
  '<end>'])

In [329]:
inp_lang = LanguageIndex(chinese)
targ_lang = LanguageIndex(english)

chinese = [[inp_lang.word2idx[w] for w in s] for s in chinese]
english = [[targ_lang.word2idx[w] for w in s] for s in english]
chinese[1], english[1]

([65,
  66,
  60,
  109,
  4,
  5,
  105,
  93,
  38,
  2,
  96,
  17,
  26,
  113,
  5,
  26,
  40,
  118,
  85,
  68,
  103,
  26,
  30,
  67,
  82,
  56,
  110,
  97,
  36,
  78,
  39],
 [91,
  107,
  21,
  64,
  83,
  2,
  29,
  82,
  114,
  57,
  23,
  29,
  75,
  49,
  111,
  102,
  29,
  14,
  8,
  118,
  96,
  84,
  51,
  108,
  29,
  137,
  39,
  42,
  46,
  37,
  58,
  108,
  114,
  57,
  38,
  29,
  6,
  46,
  29,
  22,
  76,
  8,
  39,
  29,
  90,
  128,
  46,
  45,
  30,
  109,
  59,
  102,
  50,
  116,
  53])

In [330]:
def max_length(tensor):
    return max(len(t) for t in tensor)

max_length_inp, max_length_tar = max_length(chinese), max_length(english)

input_tensor = tf.keras.preprocessing.sequence.pad_sequences(chinese, 
                                                                 maxlen=max_length_inp,
                                                                 padding='post')
target_tensor = tf.keras.preprocessing.sequence.pad_sequences(chinese, 
                                                                  maxlen=max_length_tar, 
                                                                  padding='post')

In [331]:
input_tensor, target_tensor

(array([[ 65,  92,  84,  95,   1,  26,  30,  67, 116,  44,  24,  69,  10,
         105,  78,  39,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0],
        [ 65,  66,  60, 109,   4,   5, 105,  93,  38,   2,  96,  17,  26,
         113,   5,  26,  40, 118,  85,  68, 103,  26,  30,  67,  82,  56,
         110,  97,  36,  78,  39,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0],
        [ 65,  86,  44, 119,  92,  84,  41,   1,  15,  30,  91, 120, 109,
          37,  87, 106,  29,  30,  67, 109,  83,  42,  28,  26, 115,  58,
         117,  90,  67,  49,  29,  71, 109,  44,  26,  21, 112,  63,  40,
         101, 109,  94,  12,  78,  39],
        [ 65,  40,  88, 111,  27,  76, 109,  73,  78,  39,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   

In [332]:
vocabulary_dimension = 20
inp_vocabulary_matrix = np.random.rand(len(inp_lang.word2idx), vocabulary_dimension)
targ_vocabulary_matrix = np.random.rand(len(targ_lang.word2idx), vocabulary_dimension)
unites = 128
batch_size = 4
lr = 0.0001
encoder = Encoder(len(inp_lang.word2idx), vocabulary_dimension, inp_vocabulary_matrix, unites, batch_size)
decoder = Decoder(len(targ_lang.word2idx), vocabulary_dimension, targ_vocabulary_matrix, unites, batch_size)
optimizer = tf.keras.optimizers.Adam(lr)

In [333]:
BUFFER_SIZE = len(input_tensor)
dataset = tf.data.Dataset.from_tensor_slices((input_tensor, target_tensor)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(batch_size, drop_remainder=True)

In [334]:
dataset

<BatchDataset shapes: ((4, 45), (4, 57)), types: (tf.int32, tf.int32)>

In [358]:
EPOCHS = 1
t1 = time.time()
for epoch in range(EPOCHS):
    hidden = encoder.initialize_hidden_state()
    total_loss = 0
    
    for (batch, (inp, targ)) in enumerate(dataset):
        loss = 0
        
        with tf.GradientTape() as tape:
            enc_output, enc_hidden = encoder(inp, hidden)
            
            dec_hidden = enc_hidden

            dec_input = tf.expand_dims([targ_lang.word2idx['<start>']] * batch_size, 1)

            for t in range(1, targ.shape[1]):
                predictions, dec_hidden, attention_weights = decoder(dec_input, dec_hidden, enc_output)
                del attention_weights

                loss += loss_function(targ[:, t], predictions)

                dec_input = tf.expand_dims(targ[:, t], 1)
                
    total_loss += loss / targ.shape[1]
    variables = encoder.variables + decoder.variables
    
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
print(f'finished training, time cost: {time.time() - t1}')

finished training, time cost: 0.8394513130187988
