In [42]:
import tensorflow as tf 
from tensorflow.examples.tutorials.mnist import input_data

tf.set_random_seed(1)
tf.reset_default_graph()
# data 
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

# set hyperparameters
learn_rate = 0.001
training_iters = 100000
batch_size = 128
n_inputs = 28 # MNIST data input (image shape 28 * 28)
n_steps = 28 # rnn cells numbers 一张图片每次输入28个pixes，需输入28次，按照时间顺序需要查看28步
n_hidden_layers = 128 # 进入cells之前的隐藏层
n_classes = 10 # cells输出之后的隐藏层，得到结果

# tf Graph Input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])

# define weights
weights = {
    'in': tf.Variable(tf.truncated_normal([n_inputs, n_hidden_layers])), # 28 * 128
    'out': tf.Variable(tf.truncated_normal([n_hidden_layers, n_classes])) # 128 * 10
}

bias = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_layers])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))
}

def RNN(X, weights, bias):
    # hidden layer for input to cell
    ################################

    # transpose the inputs shape from x --> (128batch * 28 steps, 28 inputs)
    X = tf.reshape(X, [-1, n_inputs])

    # into hidden
    X_in = tf.matmul(X, weights['in']) + bias['in'] # 矩阵乘法只能2维*2维，所以要先transpose，然后转回去

    # retranspose
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_layers])

    # cell
    ################################
    # forget_bias:LSTM的忘记系数，如果等于1，就是不会忘记任何信息。如果等于0，就都忘记
    cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_layers, forget_bias=0.0, state_is_tuple=True)
    init_state = cell.zero_state(batch_size, dtype=tf.float32) # 每一批的每一个是独立的
    
    '''
    函数原型：
    tf.nn.dynamic_rnn
    (
    cell,
    inputs,
    sequence_length=None,
    initial_state=None,
    dtype=None,
    parallel_iterations=None,
    swap_memory=False,
    time_major=False,
    scope=None
    )
    
    Returns:
    A pair (outputs, state) where:

    outputs: The RNN output Tensor.

    If time_major == False (default), this will be a Tensor shaped: [batch_size, max_time, cell.output_size].
    If time_major == True, this will be a Tensor shaped: [max_time, batch_size, cell.output_size].

    Note：
    if cell.output_size is a (possibly nested) tuple of integers or TensorShape objects, 
    then outputs will be a tuple having the same structure as cell.output_size, 
    containing Tensors having shapes corresponding to the shape data in cell.output_size.

    state: The final state. 
    If cell.state_size is an int, this will be shaped [batch_size, cell.state_size]. 
    If it is a TensorShape, this will be shaped [batch_size] + cell.state_size. 
    If it is a (possibly nested) tuple of ints or TensorShape, this will be a tuple having the corresponding shapes. 
    If cells are LSTMCells state will be a tuple containing a LSTMStateTuple for each cell.
    lstm 来说, state可被分为(c_state, h_state) 【主线state， 支线state】

    '''
    outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state, time_major=False)

    # hidden layer for output as the final results
    # method1: 
    # result = tf.matmul(final_state[1], weights['out']) + bias['out'] # final_state:(major state, side state)
    # method2:
    '''
    tf.transpose([1, 0, 2]) :交换坐标轴， 第0维和第1维交换
    tf.unstack(): 矩阵分解函数，默认axis=0，分解成一个list,降纬
    '''
    
    # outputs: shape=[batch_size, n_step, n_hidden_layers]
    outputs = tf.unstack(tf.transpose(outputs, [1, 0, 2]))
    # output[-1]=final_state, shape=[batch_size, n_hidden_layers]
    results = tf.matmul(outputs[-1], weights['out']) + bias['out']

    return results

pred = RNN(x, weights, bias)
# logits, logits=log(p/1-p)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))  

train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss)

accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)), dtype=tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    
    sess.run(init)
    times = 0 
    while times*batch_size < training_iters:
 
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        test_x, test_y = mnist.test.next_batch(batch_size)
        batch_x = batch_x.reshape([batch_size, n_steps, n_inputs])
        test_x = test_x.reshape([batch_size, n_steps, n_inputs])
        sess.run(train_step, feed_dict={x: batch_x, y: batch_y})
        if times % 20 == 0:
            print(sess.run(accuracy, feed_dict={x: test_x, y: test_y}))
        times+=1

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
0.1484375
0.6875
0.8046875
0.7578125
0.828125
0.8046875
0.8515625
0.8515625
0.90625
0.90625
0.8984375
0.921875
0.9296875
0.921875
0.9140625
0.921875
0.921875
0.9453125
0.8984375
0.9296875
0.9453125
0.984375
0.9375
0.9296875
0.9296875
0.9453125
0.953125
0.9453125
0.9609375
0.9453125
0.984375
0.96875
0.9609375
0.9375
0.9453125
0.9609375
0.9609375
0.9765625
0.9453125
0.9453125


In [None]:
import tensorflow as tf 
import numpy as np 
import matplotlib.pyplot as plt 

# %matplotlib inline
# tf.reset_default_graph()
# seq2seq model

# hyper-parameters

BATCH_START = 0   # 第几批batch
TIME_STEPS = 20 # 共有20个重复的rnn cells
BATCH_SIZE = 50 # 每一批有50个samples
INPUT_SIZE = 1 # 每一次传入一个数据
OUTPUT_SIZE = 1 # 每一次输出一个数据
CELL_SIZE = 20 # rnn cells的隐藏层有20个neurons
LR = 0.005 # 学习率

def get_bacth():
    global BATCH_START, TIME_STEPS
    xs = np.arange(BATCH_START, TIME_STEPS*BATCH_SIZE + BATCH_START).reshape((BATCH_SIZE, TIME_STEPS))
#     xs = xs / (10 * np.pi)
    seq = np.sin(xs)
    res = np.cos(xs)
    BATCH_START += TIME_STEPS
    # seq.shape(BATCH_SIZE, TIME_STEPS) --> seq.shape(BATCH_SIZE, TIME_STEPS, 1)
    seq = seq[:, :, np.newaxis]
    res = res[:, :, np.newaxis]
    return [seq, res, xs]


class LSTMRNN(object):
    def __init__(self, n_steps, input_size, output_size, cell_size, batch_size):
        self.n_steps = n_steps
        self.input_size = input_size
        self.output_size = output_size
        self.cell_size = cell_size
        self.batch_size = batch_size
        with tf.name_scope('Inputs'):
            self.xs = tf.placeholder(tf.float32, [None, n_steps, input_size], name='xs')
            self.ys = tf.placeholder(tf.float32, [None, n_steps, output_size], name='ys')

        with tf.variable_scope('In_hidden'):
            self.add_input_layer()
        with tf.variable_scope('LSTM_cell'):
            self.add_cells()
        with tf.variable_scope('Out_hidden'):
            self.add_output_layer()

        with tf.name_scope('Cost'):
            self.compute_cost()
        with tf.name_scope('Train'):
            self.train_op = tf.train.AdamOptimizer(LR).minimize(self.cost)
            
        '''
        tf.name_scope()中：
        1.tf.Variable(): 	定义的时候, 假如name都一样, 但输出的变量名并不是一样的，本质上他们并不是一样的变量. 
        2.tf.get_variable()：定义的变量不会被tf.name_scope()当中的名字所影响.

        tf.variable_scope()中：
        1.可以重复利用变量
        2.tf.Variable()每次都会产生新的变量
        3.tf.get_variable()如果遇到了同样名字的变量时, 它会单纯的提取这个同样名字的变量(避免产生新变量). 
        4.而在重复使用的时候, 一定要在代码中强调 scope.reuse_variables(), 否则系统将会报错.
        '''

    def add_input_layer(self):
        l_in_x = tf.reshape(self.xs, [-1, self.input_size], name='2_2D')
        ws_in = self._weight_variable([self.input_size, self.cell_size])
        bs_in = self._bias_variable([self.cell_size])
        with tf.name_scope('WX_PLUS_B'):
            l_in_y = tf.matmul(l_in_x, ws_in) + bs_in
        self.l_in_y = tf.reshape(l_in_y, [-1, self.n_steps, self.cell_size], name='2_3D')

    def add_cells(self):
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.cell_size)
        with tf.name_scope('Initial_state'):
            self.cell_init_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
        self.cell_outputs, self.final_state = tf.nn.dynamic_rnn(lstm_cell, self.l_in_y, 
                                                initial_state=self.cell_init_state, time_major=False)

    def add_output_layer(self):
        l_out_x = tf.reshape(self.cell_outputs, [-1, self.cell_size], name='2_2D')
        wx_out = self._weight_variable([self.cell_size, self.output_size])
        bs_out = self._bias_variable([self.output_size])
        with tf.name_scope('WX_PLUS_B'):
            self.pred = tf.matmul(l_out_x, wx_out) + bs_out

    def compute_cost(self):
        '''
        logits 
        targets 
        weights
        '''
        losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
            [tf.reshape(self.pred, [-1], name='reshaped_pred')],
            [tf.reshape(self.ys, [-1], name='reshape_target')],
            [tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)],
            average_across_timesteps=True,
            softmax_loss_function=self.ms_error,
            name='Losses'
            )
        with tf.name_scope('Average_Cost'):
            self.cost = tf.div(tf.reduce_sum(losses, name='Losses_sum'), self.batch_size, name='Average_Cost')
            tf.summary.scalar('Cost', self.cost)

    # staticmethod将class中的方法变成静态方法，可以当做普通方法一样调用,而不会将类实例本身作为第一个self参数传给方法
    @staticmethod
    def ms_error(labels, logits):
        return tf.square(tf.subtract(labels, logits))

    def _weight_variable(self, shape, name='Weights'):
        initializer = tf.truncated_normal(shape,0, 0.5)
        return tf.get_variable(initializer=initializer, name=name)

    def _bias_variable(self, shape, name='Bias'):
        initializer = tf.truncated_normal(shape, 0, 0.5)
        return tf.get_variable(name=name, initializer=initializer)

if __name__ == '__main__':
model = LSTMRNN(TIME_STEPS, INPUT_SIZE, OUTPUT_SIZE, CELL_SIZE, BATCH_SIZE)
sess = tf.Session()
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter('logs', sess.graph)
init = tf.global_variables_initializer()
sess.run(init)

plt.ion()
plt.show()
for i in range(100):
    seq, res, xs = get_bacth()
    if i == 0:
        feed_dict = {model.xs: seq, model.ys: res}
    else:
        feed_dict = {model.xs: seq, model.ys: res, model.cell_init_state: state}
    _, cost, state, pred = sess.run(
        [model.train_op, model.cost, model.final_state, model.pred],
        feed_dict=feed_dict
        )

    # plotting
    plt.plot(xs[0, :], res[0].flatten(), 'r',
             xs[0, :], pred.flatten()[:TIME_STEPS], 'b--')
    plt.ylim((-1.2, 1.2))
    plt.draw()
    plt.pause(0.3)

    if i % 20 == 0:
        print('Cost: ', round(cost, 4))
        result = sess.run(merged, feed_dict=feed_dict)
        writer.add_summary(result, i)






In [None]:
import tensorflow as tf
import numpy as np 
import matplotlib.pyplot as plt 

# Hyper-parameters
BATCH_START = 0
BATCH_SIZE = 20
TIME_STEP = 15
INPUT_SIZE = 1
CELL_SIZE = 10
OUTPUT_SIZE = 1
LEARNING_RATE = 0.005

def get_batch():
    global BATCH_START
    x = np.arange(BATCH_START, BATCH_SIZE*TIME_STEP*INPUT_SIZE + BATCH_START).reshape((BATCH_SIZE, TIME_STEP, INPUT_SIZE))
    x = x / (5 * np.pi)
    y = np.sin(x)
    seq = np.cos(x)
    BATCH_START += TIME_STEP*INPUT_SIZE
    return seq, y, x



class LSTMRNN(object):
    def __init__(self, batch_size, time_step, input_size, cell_size, output_size, lr):
        self.batch_size = batch_size
        self.time_step = time_step
        self.input_size = input_size
        self.cell_size = cell_size
        self.output_size = output_size
        self.lr = lr
        
        with tf.name_scope('Inputs'):
            self.xs = tf.placeholder(tf.float32, [None, self.time_step, self.input_size], name='xs')
            self.ys = tf.placeholder(tf.float32, [None, self.time_step, self.output_size], name='ys')
            
        with tf.variable_scope('InputLayer'):
            self.add_input_layer()
        with tf.variable_scope('Cells'):
            self.add_cells()
        with tf.variable_scope('OutputLayer'):
            self.add_output_layer()
                    
        with tf.name_scope('Cost'):
            self.calc_loss()
        with tf.name_scope('Train_Op'):
            self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.cost)
    
    def add_input_layer(self):
        l_in = tf.reshape(self.xs, [-1, self.input_size])
        ws = self._weight_variable([self.input_size, self.cell_size])
        bs = self._bias_variable([self.cell_size])
        rnn_inputs = tf.matmul(l_in, ws) + bs
        self.rnn_inputs = tf.reshape(rnn_inputs, [-1, self.time_step, self.cell_size])
        
    
    def add_cells(self):
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.cell_size, forget_bias=0.7)
        self.init_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
        self.output, self.final_state = tf.nn.dynamic_rnn(
            lstm_cell,
            self.rnn_inputs,
            initial_state=self.init_state,
            time_major=False # if time_major==False, input must to be [batch_size, time_step, ...]
        )
    
    def add_output_layer(self):
        l_output = tf.reshape(self.output, [-1, self.cell_size])
        wo = self._weight_variable([self.cell_size, self.output_size])
        bo = self._bias_variable([self.output_size])
        out = tf.matmul(l_output, wo) + bo
        self.pred = tf.reshape(out, [-1, self.time_step, self.output_size])
    
    def calc_loss(self):
        losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
            [tf.reshape(self.pred, [-1])],
            [tf.reshape(self.ys, [-1])],
            [tf.ones([self.batch_size * self.time_step], dtype=tf.float32)],
            softmax_loss_function=self.ms_error
        )
        with tf.name_scope('AverBatchCost'):
            self.cost = tf.div(tf.reduce_sum(losses), self.batch_size, name='Cost')
            tf.summary.scalar('Cost', self.cost)
        
    @staticmethod
    def ms_error(logits, labels):
        return tf.square(tf.subtract(logits, labels))
    
    def _weight_variable(self, shape, name='Weights'):
        init = tf.truncated_normal(shape, 0, 0.5)
        return tf.get_variable(initializer=init, name=name)
    
    def _bias_variable(self, shape, name='Bias'):
        init = tf.truncated_normal(shape, 0, 0.5)
        return tf.get_variable(initializer=init, name=name)
        
if __name__ == '__main__':
    model = LSTMRNN(BATCH_SIZE, TIME_STEP, INPUT_SIZE, CELL_SIZE, OUTPUT_SIZE, LEARNING_RATE)
    sess = tf.Session()
    merged = tf.summary.merge_all()
    writer = tf.summary.FileWriter('Logs/', sess.graph)
    init = tf.global_variables_initializer()
    sess.run(init)
    plt.ion()
    plt.show()
    
    for i in range(500):
        seq, label, x = get_batch()
        if i == 0:
            feed_dict = {model.xs: seq, model.ys: label}
        else:
            feed_dict = {model.xs: seq, model.ys: label, model.init_state: state}
        _, cost, state, pred = sess.run(
            [model.train_op, model.cost, model.final_state, model.pred],
            feed_dict=feed_dict
        )
        # plotting
        plt.plot(x[0, :, :], label.flatten()[: TIME_STEP], 'r',
                 x[0, :, :], pred.flatten()[: TIME_STEP], 'b--')
        plt.ylim((-1.2, 1.2))
        plt.draw()
        plt.pause(0.3)
        if i % 20 == 0:
            print('Cost: ', round(cost, 4))
            result = sess.run(merged, feed_dict=feed_dict)
            writer.add_summary(result, i)