# lstm 内部结构 源代码（成功2019-8-5）

## lstm.py

In [None]:
import random

import numpy as np
import math

def sigmoid(x): 
    return 1. / (1 + np.exp(-x))

def sigmoid_derivative(values): 
    return values*(1-values)

def tanh_derivative(values): 
    return 1. - values ** 2

# createst uniform random array w/ values in [a,b) and shape args
def rand_arr(a, b, *args): 
    np.random.seed(0)
    return np.random.rand(*args) * (b - a) + a

class LstmParam:
    def __init__(self, mem_cell_ct, x_dim):
        self.mem_cell_ct = mem_cell_ct
        self.x_dim = x_dim
        concat_len = x_dim + mem_cell_ct
        # weight matrices
        self.wg = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
        self.wi = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len) 
        self.wf = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
        self.wo = rand_arr(-0.1, 0.1, mem_cell_ct, concat_len)
        # bias terms
        self.bg = rand_arr(-0.1, 0.1, mem_cell_ct) 
        self.bi = rand_arr(-0.1, 0.1, mem_cell_ct) 
        self.bf = rand_arr(-0.1, 0.1, mem_cell_ct) 
        self.bo = rand_arr(-0.1, 0.1, mem_cell_ct) 
        # diffs (derivative of loss function w.r.t. all parameters)
        self.wg_diff = np.zeros((mem_cell_ct, concat_len)) 
        self.wi_diff = np.zeros((mem_cell_ct, concat_len)) 
        self.wf_diff = np.zeros((mem_cell_ct, concat_len)) 
        self.wo_diff = np.zeros((mem_cell_ct, concat_len)) 
        self.bg_diff = np.zeros(mem_cell_ct) 
        self.bi_diff = np.zeros(mem_cell_ct) 
        self.bf_diff = np.zeros(mem_cell_ct) 
        self.bo_diff = np.zeros(mem_cell_ct) 

    def apply_diff(self, lr = 1):
        self.wg -= lr * self.wg_diff
        self.wi -= lr * self.wi_diff
        self.wf -= lr * self.wf_diff
        self.wo -= lr * self.wo_diff
        self.bg -= lr * self.bg_diff
        self.bi -= lr * self.bi_diff
        self.bf -= lr * self.bf_diff
        self.bo -= lr * self.bo_diff
        # reset diffs to zero
        self.wg_diff = np.zeros_like(self.wg)
        self.wi_diff = np.zeros_like(self.wi) 
        self.wf_diff = np.zeros_like(self.wf) 
        self.wo_diff = np.zeros_like(self.wo) 
        self.bg_diff = np.zeros_like(self.bg)
        self.bi_diff = np.zeros_like(self.bi) 
        self.bf_diff = np.zeros_like(self.bf) 
        self.bo_diff = np.zeros_like(self.bo) 

class LstmState:
    def __init__(self, mem_cell_ct, x_dim):
        self.g = np.zeros(mem_cell_ct)
        self.i = np.zeros(mem_cell_ct)
        self.f = np.zeros(mem_cell_ct)
        self.o = np.zeros(mem_cell_ct)
        self.s = np.zeros(mem_cell_ct)
        self.h = np.zeros(mem_cell_ct)
        self.bottom_diff_h = np.zeros_like(self.h)
        self.bottom_diff_s = np.zeros_like(self.s)
    
class LstmNode:
    def __init__(self, lstm_param, lstm_state):
        # store reference to parameters and to activations
        self.state = lstm_state
        self.param = lstm_param
        # non-recurrent input concatenated with recurrent input
        self.xc = None

    def bottom_data_is(self, x, s_prev = None, h_prev = None):
        # if this is the first lstm node in the network
        if s_prev is None: s_prev = np.zeros_like(self.state.s)
        if h_prev is None: h_prev = np.zeros_like(self.state.h)
        # save data for use in backprop
        self.s_prev = s_prev
        self.h_prev = h_prev

        # concatenate x(t) and h(t-1)
        xc = np.hstack((x,  h_prev))
        self.state.g = np.tanh(np.dot(self.param.wg, xc) + self.param.bg)
        self.state.i = sigmoid(np.dot(self.param.wi, xc) + self.param.bi)
        self.state.f = sigmoid(np.dot(self.param.wf, xc) + self.param.bf)
        self.state.o = sigmoid(np.dot(self.param.wo, xc) + self.param.bo)
        self.state.s = self.state.g * self.state.i + s_prev * self.state.f
        self.state.h = self.state.s * self.state.o

        self.xc = xc
    
    def top_diff_is(self, top_diff_h, top_diff_s):
        # notice that top_diff_s is carried along the constant error carousel
        ds = self.state.o * top_diff_h + top_diff_s
        do = self.state.s * top_diff_h
        di = self.state.g * ds
        dg = self.state.i * ds
        df = self.s_prev * ds

        # diffs w.r.t. vector inside sigma / tanh function
        di_input = sigmoid_derivative(self.state.i) * di 
        df_input = sigmoid_derivative(self.state.f) * df 
        do_input = sigmoid_derivative(self.state.o) * do 
        dg_input = tanh_derivative(self.state.g) * dg

        # diffs w.r.t. inputs
        self.param.wi_diff += np.outer(di_input, self.xc)
        self.param.wf_diff += np.outer(df_input, self.xc)
        self.param.wo_diff += np.outer(do_input, self.xc)
        self.param.wg_diff += np.outer(dg_input, self.xc)
        self.param.bi_diff += di_input
        self.param.bf_diff += df_input       
        self.param.bo_diff += do_input
        self.param.bg_diff += dg_input       

        # compute bottom diff
        dxc = np.zeros_like(self.xc)
        dxc += np.dot(self.param.wi.T, di_input)
        dxc += np.dot(self.param.wf.T, df_input)
        dxc += np.dot(self.param.wo.T, do_input)
        dxc += np.dot(self.param.wg.T, dg_input)

        # save bottom diffs
        self.state.bottom_diff_s = ds * self.state.f
        self.state.bottom_diff_h = dxc[self.param.x_dim:]

class LstmNetwork():
    def __init__(self, lstm_param):
        self.lstm_param = lstm_param
        self.lstm_node_list = []
        # input sequence
        self.x_list = []

    def y_list_is(self, y_list, loss_layer):
        """
        Updates diffs by setting target sequence 
        with corresponding loss layer. 
        Will *NOT* update parameters.  To update parameters,
        call self.lstm_param.apply_diff()
        """
        assert len(y_list) == len(self.x_list)
        idx = len(self.x_list) - 1
        # first node only gets diffs from label ...
        loss = loss_layer.loss(self.lstm_node_list[idx].state.h, y_list[idx])
        diff_h = loss_layer.bottom_diff(self.lstm_node_list[idx].state.h, y_list[idx])
        # here s is not affecting loss due to h(t+1), hence we set equal to zero
        diff_s = np.zeros(self.lstm_param.mem_cell_ct)
        self.lstm_node_list[idx].top_diff_is(diff_h, diff_s)
        idx -= 1

        ### ... following nodes also get diffs from next nodes, hence we add diffs to diff_h
        ### we also propagate error along constant error carousel using diff_s
        while idx >= 0:
            loss += loss_layer.loss(self.lstm_node_list[idx].state.h, y_list[idx])
            diff_h = loss_layer.bottom_diff(self.lstm_node_list[idx].state.h, y_list[idx])
            diff_h += self.lstm_node_list[idx + 1].state.bottom_diff_h
            diff_s = self.lstm_node_list[idx + 1].state.bottom_diff_s
            self.lstm_node_list[idx].top_diff_is(diff_h, diff_s)
            idx -= 1 

        return loss

    def x_list_clear(self):
        self.x_list = []

    def x_list_add(self, x):
        self.x_list.append(x)
        if len(self.x_list) > len(self.lstm_node_list):
            # need to add new lstm node, create new state mem
            lstm_state = LstmState(self.lstm_param.mem_cell_ct, self.lstm_param.x_dim)
            self.lstm_node_list.append(LstmNode(self.lstm_param, lstm_state))

        # get index of most recent x input
        idx = len(self.x_list) - 1
        if idx == 0:
            # no recurrent inputs yet
            self.lstm_node_list[idx].bottom_data_is(x)
        else:
            s_prev = self.lstm_node_list[idx - 1].state.s
            h_prev = self.lstm_node_list[idx - 1].state.h
            self.lstm_node_list[idx].bottom_data_is(x, s_prev, h_prev)




## test.py

In [None]:
import numpy as np

#from lstm import LstmParam, LstmNetwork


class ToyLossLayer:
    """
    Computes square loss with first element of hidden layer array.
    """
    @classmethod
    def loss(self, pred, label):
        return (pred[0] - label) ** 2

    @classmethod
    def bottom_diff(self, pred, label):
        diff = np.zeros_like(pred)
        diff[0] = 2 * (pred[0] - label)
        return diff


def example_0():
    # learns to repeat simple sequence from random inputs
    np.random.seed(0)

    # parameters for input data dimension and lstm cell count
    mem_cell_ct = 100
    x_dim = 50
    lstm_param = LstmParam(mem_cell_ct, x_dim)
    lstm_net = LstmNetwork(lstm_param)
    y_list = [-0.5, 0.2, 0.1, -0.5]
    input_val_arr = [np.random.random(x_dim) for _ in y_list]

    for cur_iter in range(100):
        print("iter", "%2s" % str(cur_iter), end=": ")
        for ind in range(len(y_list)):
            lstm_net.x_list_add(input_val_arr[ind])

        print("y_pred = [" +
              ", ".join(["% 2.5f" % lstm_net.lstm_node_list[ind].state.h[0] for ind in range(len(y_list))]) +
              "]", end=", ")

        loss = lstm_net.y_list_is(y_list, ToyLossLayer)
        print("loss:", "%.3e" % loss)
        lstm_param.apply_diff(lr=0.1)
        lstm_net.x_list_clear()


if __name__ == "__main__":
    example_0()




In [None]:
#分解学习

In [None]:
# parameters for input data dimension and lstm cell count
mem_cell_ct = 100
x_dim = 50
lstm_param = LstmParam(mem_cell_ct, x_dim)
lstm_net = LstmNetwork(lstm_param)
y_list = [-0.5, 0.2, 0.1, -0.5]
input_val_arr = [np.random.random(x_dim) for _ in y_list]

In [None]:
input_val_arr

In [None]:
class ToyLossLayer:
    """
    Computes square loss with first element of hidden layer array.
    """
    @classmethod
    def loss(self, pred, label):
        return (pred[0] - label) ** 2

    @classmethod
    def bottom_diff(self, pred, label):
        diff = np.zeros_like(pred)
        diff[0] = 2 * (pred[0] - label)
        return diff


In [None]:
for cur_iter in range(2):
        print("iter", "%2s" % str(cur_iter), end=": ")
        for ind in range(len(y_list)):
            lstm_net.x_list_add(input_val_arr[ind])

        node_state_h=["% 2.5f" % lstm_net.lstm_node_list[ind].state.h[0] for ind in range(len(y_list))] 
        print(node_state_h)
        print("y_pred = [" + ", ".join(node_state_h) + "]", end=", ")

        loss = lstm_net.y_list_is(y_list, ToyLossLayer)
        print("loss:", "%.3e" % loss)
        lstm_param.apply_diff(lr=0.1)
        lstm_net.x_list_clear()

In [None]:
lstm_net.lstm_node_list[1].state.h

# Numpy实现简单RNN的前向传播

In [None]:
#一、使用Numpy实现简单RNN的前向传播

import numpy as np

timesteps = 100
input_features = 32
output_features = 64

# 输入有100个时间点，每个时间点有32维的数据
inputs = np.random.random((timesteps,input_features))
state_t = np.zeros((output_features,))

W = np.random.random((output_features,input_features)) # input的权重
U = np.random.random((output_features,output_features)) # state的权重
b = np.random.random((output_features,)) # bias

successive_outputs = []

for input_t in inputs:
    # 按timesteps进行迭代
    
    # output_t是一个64维的向量
    output_t = np.tanh(np.dot(W,input_t)+np.dot(U,state_t)+b)
    
    # 将当前时刻的输出保存到successive_outputs中
    successive_outputs.append(output_t)
    
    # 当前时刻的输出作为下一时刻的state
    state_t = output_t
    
final_output_sequence = np.concatenate(successive_outputs,axis=0)


In [None]:
inputs

In [None]:
input_t

In [None]:
W

In [None]:
final_output_sequence

# RNN例子++++(成功2019-9-2)

## 一个隐藏层

In [None]:
import tensorflow as tf
import numpy as np

n_steps = 2
n_inputs = 3
n_neurons = 5

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)

seq_length = tf.placeholder(tf.int32, [None])

outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,
                                    sequence_length=seq_length)

init = tf.global_variables_initializer()

X_batch = np.array([
        # step 0     step 1
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ])
seq_length_batch = np.array([2, 1, 2, 2])

with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run(
        [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})
    print("outputs_val.shape:", outputs_val.shape, "states_val.shape:", states_val.shape)
    print("\n outputs_val:\n", outputs_val, "\n states_val:\n", states_val)


首先输入X是一个 [batch_size，step，input_size] = [4，2，3] 的tensor，注意我们这里调用的是BasicRNNCell，只有一层循环网络，outputs是最后一层每个step的输出，它的结构是[batch_size，step，n_neurons] = [4，2，5]，states是每一层的最后那个step的输出，由于本例中，我们的循环网络只有一个隐藏层，所以它就代表这一层的最后那个step的输出，因此它和step的大小是没有关系的，我们的X有4个样本组成，输出神经元大小n_neurons是5，因此states的结构就是[batch_size，n_neurons] = [4，5]，最后我们观察数据，states的每条数据正好就是outputs的最后一个step的输出。

## 三个隐藏层

In [1]:
import tensorflow as tf
import numpy as np

n_steps = 2
n_inputs = 3
n_neurons = 5
n_layers = 3

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

seq_length = tf.placeholder(tf.int32, [None])

layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons,
                                      activation=tf.nn.relu)
          for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)

outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32, sequence_length=seq_length)

init = tf.global_variables_initializer()

X_batch = np.array([
        # step 0     step 1
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ])

seq_length_batch = np.array([2, 1, 2, 2])

with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run(
        [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})

   
    print("outputs_val.shape:", outputs, "\n states_val.shape:", states)
    print("\n outputs_val:", outputs_val, "\n states_val:", states_val)


W0903 21:13:14.878598 15740 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

W0903 21:13:14.878598 15740 deprecation.py:323] From <ipython-input-1-5168ce7fc784>:15: BasicRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.SimpleRNNCell, and will be replaced by that in Tensorflow 2.0.
W0903 21:13:14.878598 15740 deprecation.py:323] From <ipython-input-1-5168ce7fc784>:16: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class 

outputs_val.shape: Tensor("rnn/transpose_1:0", shape=(?, 2, 5), dtype=float32) 
 states_val.shape: (<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 5) dtype=float32>, <tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 5) dtype=float32>, <tf.Tensor 'rnn/while/Exit_5:0' shape=(?, 5) dtype=float32>)

 outputs_val: [[[0.         0.10676921 0.         0.         0.10809741]
  [0.         0.64463055 0.         0.         1.6826782 ]]

 [[0.         0.3267716  0.         0.         0.58085716]
  [0.         0.         0.         0.         0.        ]]

 [[0.         0.5901417  0.         0.         1.1638079 ]
  [0.         0.13024622 0.         0.         1.877127  ]]

 [[0.         2.5622442  0.514341   0.         3.8845227 ]
  [1.0911777  0.         0.         1.1432415  0.52477217]]] 
 states_val: (array([[5.9811273 , 0.        , 0.        , 1.4522586 , 0.        ],
       [2.1700091 , 0.        , 0.        , 1.2011844 , 0.        ],
       [5.909309  , 0.        , 0.        , 1.0427554 , 0.        ],


In [None]:
# encoding:utf-8
import tensorflow as tf

batch_size=10
depth=128 #一个序列中有128个样本

inputs=tf.Variable(tf.random_normal([batch_size,depth]))# 一次输入 128 个样本的序列

previous_state0=(tf.random_normal([batch_size,100]),tf.random_normal([batch_size,100]))
previous_state1=(tf.random_normal([batch_size,200]),tf.random_normal([batch_size,200]))
previous_state2=(tf.random_normal([batch_size,300]),tf.random_normal([batch_size,300]))

num_units=[100,200,300]
print(inputs)

cells=[tf.nn.rnn_cell.BasicLSTMCell(num_unit) for num_unit in num_units]
mul_cells=tf.nn.rnn_cell.MultiRNNCell(cells)

outputs,states=mul_cells(inputs,(previous_state0,previous_state1,previous_state2))

print(outputs.shape) #(10, 300)
print(states[0]) #第一层LSTM
print(states[1]) #第二层LSTM
print(states[2]) ##第三层LSTM
print(states[0].h.shape) #第一层LSTM的h状态,(10, 100)
print(states[0].c.shape) #第一层LSTM的c状态,(10, 100)
print(states[1].h.shape) #第二层LSTM的h状态,(10, 200)


# tf.keras.layers.SimpleRNNCell学习

## Single layer RNN Cell

In [None]:
import tensorflow as tf

x = tf.random.normal([4, 80, 100])
ht0 = x[:, 0, :]

cell = tf.keras.layers.SimpleRNNCell(64)

out, ht1 = cell(ht0, [tf.zeros([4, 64])])

print(out.shape, ht1[0].shape)
#[]
#
#(TensorShape([4, 64]), TensorShape([4, 64]))
id(out), id(ht1[0])  # same id
#(4877125168, 4877125168)


In [None]:
ht0

In [None]:
print(tf.keras.layers.SimpleRNNCell.__doc__)

## Multi-Layers RNN

In [None]:
x = tf.random.normal([4, 80, 100])
ht0 = x[:, 0, :]

cell0 = tf.keras.layers.SimpleRNNCell(64)
cell2 = tf.keras.layers.SimpleRNNCell(64)
state0 = [tf.zeros([4, 64])]
state1 = [tf.zeros([4, 64])]

out0, state0 = cell0(ht0, state0)
out2, state2 = cell2(out0, state1)

print(out2.shape, state2[0].shape)
#(TensorShape([4, 64]), TensorShape([4, 64]))


# tf.nn.rnn_cell.BasicRNNCell学习

In [None]:
import tensorflow as tf

def basic_rnn_demo():
    cell = tf.nn.rnn_cell.BasicRNNCell(num_units=4) ####
    zero_state = cell.zero_state(batch_size=2, dtype=tf.float32)
    a = tf.random_normal([2, 3, 4])
    out, state = tf.nn.dynamic_rnn(
        cell=cell,
        initial_state=zero_state,
        inputs=a
    )
    print(out)
    print(state)
    
basic_rnn_demo()

In [None]:
import tensorflow as tf

def basic_rnn_demo():
    cell = tf.keras.layers.SimpleRNNCell(num_units=4)####
    zero_state = cell.zero_state(batch_size=2, dtype=tf.float32)
    a = tf.random_normal([2, 3, 4])
    out, state = tf.nn.dynamic_rnn(
        cell=cell,
        initial_state=zero_state,
        inputs=a
    )
    print(out)
    print(state)
    
basic_rnn_demo()

In [None]:
# -*- coding:utf-8 -*-
# author: adowu
# https://raw.githubusercontent.com/adowu/ado-tensorflow-models/master/03_AllRNN/basic_rnn_demo.py
import tensorflow as tf

tf.enable_eager_execution()


def basic_rnn_demo():
    """
    Most basic rnn
    tanh(W * input + U * state + B)

    #   inputs [2,3,4]  state = [2,4]
    #   unstack(inputs) = 3 size [2,4]
    #   每次是进行一个batch的时间步骤的计算，第一个进行的就是每个batch中的第一个字
    #   a = [2, 8]
    a = concat([inputs,state], 1)
    #   kernel_ = [8, 4] kernel_在每个时间步都是共享的
    kernel_ = [inputs_dim + num_units,num_units]
    #   b = [2, 4]
    b = matmul(a, kernel_)
    #   c = [2, 4] bias初始化为0 bias 在每个时间步都是共享的
    c = b + bias
    #   [2, 4]
    #   会返回一个tuple，内容都是output，一个作为此时刻的state,这样state就可以了从第一个一直往后更新
    output,output = tanh(c)
    """

    cell = tf.nn.rnn_cell.BasicRNNCell(num_units=4)
    
    zero_state = cell.zero_state(batch_size=2, dtype=tf.float32)
    
    a = tf.random_normal([2, 3, 4])

    """
    out
    tf.Tensor(
    [[[ 0.7875833   0.11634824  0.31249827  0.11648687]
      [ 0.6418752  -0.9281747   0.6534868   0.3821376 ]
      [ 0.9750985  -0.40439364  0.9770327   0.8529797 ]]
    
     [[-0.09945039 -0.49678802 -0.32603818  0.20098403]
      [-0.57557577  0.15389016 -0.7197561  -0.36572933]
      [ 0.4485007  -0.51780844 -0.6015551   0.16041796]]], shape=(2, 3, 4), dtype=float32)
    
    state
    tf.Tensor(
    [[ 0.9750985  -0.40439364  0.9770327   0.8529797 ]
     [ 0.4485007  -0.51780844 -0.6015551   0.16041796]], shape=(2, 4), dtype=float32)
    """
    #   output shape = [2,3,4] 表示的是每个时间步的输出
    #   state shape = [2, 4] 表示最后的状态输出
    out, state = tf.nn.dynamic_rnn(
        cell=cell,
        initial_state=zero_state,
        inputs=a
    )

    print(out)
    print(state)


if __name__ == '__main__':
    basic_rnn_demo()



# RNN -手写数据集 成功2019-8-5

In [None]:
from keras.models import Sequential
from keras.layers import Dense,SimpleRNN,Activation
from keras.datasets import mnist
from keras.utils import np_utils
from keras.optimizers import Adam
import numpy as np

#(x_train,y_train),(x_test,y_test) = mnist.load_data()


#已经下载了数据要，直接加载
f = np.load("mnist.npz")
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
f.close()


x_train = x_train.reshape(-1,28,28)/255
x_test = x_test.reshape(-1,28,28)/255
y_train = np_utils.to_categorical(y_train,num_classes=10)
y_test = np_utils.to_categorical(y_test,num_classes=10)

TIME_STEPS = 28 # as same as the image height
INPUT_SIZE = 28 # as same as the image width
BATCH_SIZE = 100
BATCH_INDEX = 0
OUTPUT_SIZE = 10
CELL_SIZE = 50 # how many hidden layer
LR = 0.001

# built the RNN model
model = Sequential()
model.add(SimpleRNN(batch_input_shape=(None,TIME_STEPS,INPUT_SIZE),
                    output_dim=CELL_SIZE,
                    activation='relu'))
model.add(Dense(OUTPUT_SIZE))
model.add(Activation('softmax'))


adam = Adam(LR)
model.compile(optimizer=adam,loss='categorical_crossentropy',metrics=['accuracy'])

# training
#print('training...')
for step in range(4001):
    
    x_batch = x_train[BATCH_INDEX:BATCH_SIZE+BATCH_INDEX,:,:]
    y_batch = y_train[BATCH_INDEX:BATCH_SIZE+BATCH_INDEX,:]
    
    cost = model.train_on_batch(x_batch,y_batch)
    
    BATCH_INDEX += BATCH_SIZE
    if BATCH_INDEX >= x_train.shape[0]:
        BATCH_INDEX = 0
    
    if step%500 == 0:
        cost,accuracy = model.evaluate(x_test,y_test,batch_size=y_test.shape[0],verbose=False)
        print('cost : ',cost,' accuracy : ',accuracy)


# 手写Bi-RNN 成功2019--8-1


- Project: https://github.com/aymericdamien/TensorFlow-Examples/

BiRNN Overview

<img src="https://ai2-s2-public.s3.amazonaws.com/figures/2016-11-08/191dd7df9cb91ac22f56ed0dfa4a5651e8767a51/1-Figure2-1.png" alt="nn" style="width: 600px;"/>



In [None]:
from __future__ import print_function

import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np

# Import MNIST data
#from tensorflow.examples.tutorials.mnist import input_data
#mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
MNIST_data =r'C:\Users\yuli\[Python程序设计]++++++++++++\MNIST_data'
mnist = input_data.read_data_sets(MNIST_data,one_hot=True) 


# Training Parameters
learning_rate = 0.01
training_steps = 100
batch_size = 128
display_step = 10

# Network Parameters
num_input = 28 # MNIST data input (img shape: 28*28)
timesteps = 28 # timesteps
num_hidden = 128 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])

# Define weights
weights = {
    # Hidden layer weights => 2*n_hidden because of forward + backward cells
    'out': tf.Variable(tf.random_normal([2*num_hidden, num_classes]))}

biases = {'out': tf.Variable(tf.random_normal([num_classes]))}


def BiRNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
    x = tf.unstack(x, timesteps, 1)

    lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)#前向
    lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)#后向

    # Get lstm cell output
    try:
        outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,dtype=tf.float32)
    except Exception: # Old TensorFlow version only returns outputs not states
        outputs =       rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

logits = BiRNN(X, weights, biases)######################
prediction = tf.nn.softmax(logits)

loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.global_variables_initializer()

with tf.Session() as sess:

    sess.run(init)

    for step in range(1, training_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, timesteps, num_input))
        
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:",sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))


# 二进制加法代码（成功）

In [None]:

import copy, numpy as np

np.random.seed(0)  # 固定随机数生成器的种子，便于得到固定的输出

# compute sigmoid nonlinearity
def sigmoid(x):  # 激活函数
    output = 1 / (1 + np.exp(-x))
    return output


# convert output of sigmoid function to its derivative
def sigmoid_output_to_derivative(output):  # 激活函数的导数
    return output * (1 - output)


# training dataset generation
int2binary = {}  # 整数到其二进制表示的映射
binary_dim = 8  # 暂时制作256以内的加法， 可以调大

## 以下5行代码计算0-256的二进制表示
largest_number = pow(2, binary_dim)
binary = np.unpackbits(np.array([range(largest_number)], dtype=np.uint8).T, axis=1)
for i in range(largest_number):
    int2binary[i] = binary[i]

# input variables
alpha = 0.1  # 学习速率
input_dim = 2  # 因为我们是做两个数相加，每次会喂给神经网络两个bit，所以输入的维度是2
hidden_dim = 16  # 隐藏层的神经元节点数，远比理论值要大（译者注：理论上而言，应该一个节点就可以记住有无进位了，但我试了发现4的时候都没法收敛），你可以自己调整这个数，看看调大了是容易更快地收敛还是更慢
output_dim = 1  # 我们的输出是一个数，所以维度为1

# initialize neural network weights
synapse_0 = 2 * np.random.random((input_dim, hidden_dim)) - 1  # 输入层到隐藏层的转化矩阵，维度为2*16， 2是输入维度，16是隐藏层维度
synapse_1 = 2 * np.random.random((hidden_dim, output_dim)) - 1
synapse_h = 2 * np.random.random((hidden_dim, hidden_dim)) - 1
# 译者注：np.random.random产生的是[0,1)的随机数，2 * [0, 1) - 1 => [-1, 1)，
# 是为了有正有负更快地收敛，这涉及到如何初始化参数的问题，通常来说都是靠“经验”或者说“启发式规则”，说得直白一点就是“蒙的”！机器学习里面，超参数的选择，大部分都是这种情况，哈哈。。。
# 我自己试了一下用【0, 2)之间的随机数，貌似不能收敛，用[0,1)就可以，呵呵。。。
# 以下三个分别对应三个矩阵的变化
synapse_0_update = np.zeros_like(synapse_0)
synapse_1_update = np.zeros_like(synapse_1)
synapse_h_update = np.zeros_like(synapse_h)

# training logic
# 学习1000个例子
for j in range(1000):

    # 下面6行代码，随机产生两个0-128的数字，并查出他们的二进制表示。为了避免相加之和超过256，这里选择两个0-128的数字
    # generate a simple addition problem (a + b = c)
    a_int = np.random.randint(largest_number / 2)  # int version
    a = int2binary[a_int]  # binary encoding
    b_int = np.random.randint(largest_number / 2)  # int version
    b = int2binary[b_int]  # binary encoding
    # true answer
    c_int = a_int + b_int
    c = int2binary[c_int]

    # where we'll store our best guess (binary encoded)
    # 存储神经网络的预测值
    d = np.zeros_like(c)
    overallError = 0  # 每次把总误差清零

    layer_2_deltas = list()  # 存储每个时间点输出层的误差
    layer_1_values = list()  # 存储每个时间点隐藏层的值
    layer_1_values.append(np.zeros(hidden_dim))  # 一开始没有隐藏层，所以里面都是0

    # moving along the positions in the binary encoding
    for position in range(binary_dim):  # 循环遍历每一个二进制位

        # generate input and output
        X = np.array([[a[binary_dim - position - 1], b[binary_dim - position - 1]]])  # 从右到左，每次去两个输入数字的一个bit位
        y = np.array([[c[binary_dim - position - 1]]]).T  # 正确答案
        # hidden layer (input ~+ prev_hidden)
        layer_1 = sigmoid(np.dot(X, synapse_0) + np.dot(layer_1_values[-1],
                                                        synapse_h))  # （输入层 + 之前的隐藏层） -> 新的隐藏层，这是体现循环神经网络的最核心的地方！！！
        # output layer (new binary representation)
        layer_2 = sigmoid(np.dot(layer_1, synapse_1))  # 隐藏层 * 隐藏层到输出层的转化矩阵synapse_1 -> 输出层
        # did we miss?... if so, by how much?
        layer_2_error = y - layer_2  # 预测误差是多少
        layer_2_deltas.append((layer_2_error) * sigmoid_output_to_derivative(layer_2))  # 我们把每一个时间点的误差导数都记录下来
        overallError += np.abs(layer_2_error[0])  # 总误差

        # decode estimate so we can print it out
        d[binary_dim - position - 1] = np.round(layer_2[0][0])  # 记录下每一个预测bit位

        # store hidden layer so we can use it in the next timestep
        layer_1_values.append(copy.deepcopy(layer_1))  # 记录下隐藏层的值，在下一个时间点用

    future_layer_1_delta = np.zeros(hidden_dim)

    # 前面代码我们完成了所有时间点的正向传播以及计算最后一层的误差，
    #现在我们要做的是反向传播，从最后一个时间点到第一个时间点
    for position in range(binary_dim):
        X = np.array([[a[position], b[position]]])  # 最后一次的两个输入
        layer_1 = layer_1_values[-position - 1]      # 当前时间点的隐藏层
        prev_layer_1 = layer_1_values[-position - 2]  # 前一个时间点的隐藏层

        # error at output layer
        layer_2_delta = layer_2_deltas[-position - 1]  # 当前时间点输出层导数
        # error at hidden layer
        # 通过后一个时间点（因为是反向传播）的隐藏层误差和当前时间点的输出层误差，计算当前时间点的隐藏层误差
        layer_1_delta = (future_layer_1_delta.dot(synapse_h.T) + layer_2_delta.dot(
            synapse_1.T)) * sigmoid_output_to_derivative(layer_1)
        # let's update all our weights so we can try again
        # 我们已经完成了当前时间点的反向传播误差计算， 可以构建更新矩阵了。但是我们并不会现在就更新权重矩阵，因为我们还要用他们计算前一个时间点的更新矩阵呢。
        # 所以要等到我们完成了所有反向传播误差计算， 才会真正的去更新权重矩阵，我们暂时把更新矩阵存起来。
        # 可以看这里了解更多关于反向传播的知识http://iamtrask.github.io/2015/07/12/basic-python-network/
        synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
        synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
        synapse_0_update += X.T.dot(layer_1_delta)

        future_layer_1_delta = layer_1_delta

    # 我们已经完成了所有的反向传播，可以更新几个转换矩阵了。并把更新矩阵变量清零
    synapse_0 += synapse_0_update * alpha
    synapse_1 += synapse_1_update * alpha
    synapse_h += synapse_h_update * alpha
    synapse_0_update *= 0
    synapse_1_update *= 0
    synapse_h_update *= 0

    # print out progress
    if (j % 1000 == 0):
        print("Error:" + str(overallError))
        print("Pred:" + str(d))
        print("True:" + str(c))
        out = 0
        for index, x in enumerate(reversed(d)):
            out += x * pow(2, index)
        print(str(a_int) + " + " + str(b_int) + " = " + str(out))
        print("------------")

# 三个字母映射代码 成功

In [None]:

# Naive LSTM to learn three-char time steps to one-char mapping
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import np_utils
# fix random seed for reproducibility
numpy.random.seed(7)
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

# prepare the dataset of input to output pairs encoded as integers
seq_length = 3
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print (seq_in, '->', seq_out)

print ("dataX=",dataX)
#dataX= [[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 6], [5, 6, 7], [6, 7, 8], [7, 8, 9], [8, 9, 10], [9, 10, 11], [10, 11, 12], [11, 12, 13], [12, 13, 14], [13, 14, 15], [14, 15, 16], [15, 16, 17], [16, 17, 18], [17, 18, 19], [18, 19, 20], [19, 20, 21], [20, 21, 22], [21, 22, 23], [22, 23, 24]]

# reshape X to be [samples, time steps, features]
X = numpy.reshape(  dataX,    ( len(dataX), seq_length, 1 )    )
#print ('X=',X)
#X= [
# [[ 0]
#  [ 1]
#  [ 2]]
#
# [[ 1]
#  [ 2]
#  [ 3]]
#
# [[ 2]
#  [ 3]
#  [ 4]]
#...]

# normalize
X = X / float(len(alphabet))
#print("X=",X)
#X= [[[ 0.        ]
#  [ 0.03846154]
#  [ 0.07692308]]
#...

# one hot encode the output variable
y = np_utils.to_categorical(dataY)
#print("y=",y)
#y= [[ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#   0.  0.  0.  0.  0.  0.  0.  0.]
# [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#   0.  0.  0.  0.  0.  0.  0.  0.]
#...

#  create and fit the model
model = Sequential()
model.add(     LSTM(32, input_shape=(X.shape[1], X.shape[2]))    )####
model.add(     Dense(y.shape[1], activation='softmax')    )
model.compile(     loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']   )
model.fit(X, y, nb_epoch=50, batch_size=1, verbose=2)

# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

# demonstrate some model predictions
for pattern in dataX:
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print (seq_in, "->", result)

print('End1')

In [None]:
# 下面分解学习

In [None]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import np_utils

# fix random seed for reproducibility
numpy.random.seed(7)
# define the raw dataset
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# create mapping of characters to integers (0-25) and the reverse
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

In [None]:
char_to_int

In [None]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 3
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i:i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print (seq_in, '->', seq_out)

print ("dataX=",dataX)

In [None]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(  dataX,    ( len(dataX), seq_length, 1 )    )
X

In [None]:
# normalize
X = X / float(len(alphabet))

In [None]:
X

In [None]:
# one hot encode the output variable
y = np_utils.to_categorical(dataY)


In [None]:
y

In [None]:
#  create and fit the model
model = Sequential()
model.add(     LSTM(32, input_shape=(X.shape[1], X.shape[2]))    )####
model.add(     Dense(y.shape[1], activation='softmax')    )
model.compile(     loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']   )

model.fit(X, y, nb_epoch=50, batch_size=1, verbose=2)


In [None]:
# summarize performance of the model
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))

# demonstrate some model predictions
for pattern in dataX:
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print (seq_in, "->", result)

print('End1')

# 二进制加法代码（成功）

In [None]:
#succeed! Python35

import copy, numpy as np
np.random.seed(0)

# compute sigmoid nonlinearity
def sigmoid(x):
    output = 1/(1+np.exp(-x))
    return output

# convert output of sigmoid function to its derivative
def sigmoid_output_to_derivative(output):
    return output*(1-output)

# training dataset generation
int2binary = {} #查找表，这个表是一个实数与对应二进制表示的映射
binary_dim = 8 #二进制数的最大长度

largest_number = pow(2,binary_dim)  #计算了跟二进制最大长度对应的可以表示的最大十进制数
#生成了十进制数转二进制数的查找表，并将其复制到int2binary
binary = np.unpackbits(
    np.array([range(largest_number)],dtype=np.uint8).T,axis=1)
for i in range(largest_number):
    int2binary[i] = binary[i]

# input variables
alpha = 0.1 #学习速率
input_dim = 2 #我们要把两个数加起来，所以我们一次要输入两位字符
hidden_dim = 16
output_dim = 1

# initialize neural network weights
synapse_0 = 2*np.random.random((input_dim,hidden_dim)) - 1    #2×16
synapse_1 = 2*np.random.random((hidden_dim,output_dim)) - 1  #16×1
synapse_h = 2*np.random.random((hidden_dim,hidden_dim)) - 1 #16×16

synapse_0_update = np.zeros_like(synapse_0)
synapse_1_update = np.zeros_like(synapse_1)
synapse_h_update = np.zeros_like(synapse_h)


# training logic
for j in range(10000):
    
    # generate a simple addition problem (a + b = c)
    a_int = np.random.randint(largest_number/2)  # int version
    a = int2binary[a_int] # binary encoding a_int对应的二进制表示

    b_int = np.random.randint(largest_number/2) # int version
    b = int2binary[b_int] # binary encoding

    # true answer
    c_int = a_int + b_int
    c = int2binary[c_int]  #正确结果转化为二进制表示
    
    # where we'll store our best guess (binary encoded)
    d = np.zeros_like(c)  #初始化一个空的二进制数组，用来存储神经网络的预测值

    overallError = 0
    
    layer_2_deltas = list()  #记录layer 2的导数值
    layer_1_values = list()  #记录layer 1值
    layer_1_values.append(np.zeros(hidden_dim)) #在0时刻是没有之前的隐含层的，所以我们初始化一个全为0的
    
    # moving along the positions in the binary encoding
    for position in range(binary_dim): #循环是遍历二进制数字
        
        # generate input and output
        X = np.array([[a[binary_dim - position - 1],b[binary_dim - position - 1]]]) #X数组中的每个元素包含两个二进制数，其中一个来自a，一个来自b
        y = np.array([[c[binary_dim - position - 1]]]).T

        # hidden layer (input ~+ prev_hidden)
        layer_1 = sigmoid(np.dot(X,synapse_0) + np.dot(layer_1_values[-1],synapse_h))

        # output layer (new binary representation)
        layer_2 = sigmoid(np.dot(layer_1,synapse_1))

        # did we miss?... if so by how much?
        layer_2_error = y - layer_2
        layer_2_deltas.append((layer_2_error)*sigmoid_output_to_derivative(layer_2)) #把导数值存起来
        overallError += np.abs(layer_2_error[0])
    
        # decode estimate so we can print it out
        d[binary_dim - position - 1] = np.round(layer_2[0][0])
        
        # store hidden layer so we can use it in the next timestep
        layer_1_values.append(copy.deepcopy(layer_1)) #将layer_1的值拷贝到另外一个数组里，这样我们就可以下一个时间使用这个值
    
    future_layer_1_delta = np.zeros(hidden_dim)

    #现在我们需要做的就是反向传播
    for position in range(binary_dim):
        
        X = np.array([[a[position],b[position]]])
        layer_1 = layer_1_values[-position-1]
        prev_layer_1 = layer_1_values[-position-2]
        
        # error at output layer
        layer_2_delta = layer_2_deltas[-position-1]
        # error at hidden layer
        layer_1_delta = (future_layer_1_delta.dot(synapse_h.T) + \
            layer_2_delta.dot(synapse_1.T)) * sigmoid_output_to_derivative(layer_1)
        # let's update all our weights so we can try again
        synapse_1_update += np.atleast_2d(layer_1).T.dot(layer_2_delta)
        synapse_h_update += np.atleast_2d(prev_layer_1).T.dot(layer_1_delta)
        synapse_0_update += X.T.dot(layer_1_delta)
        
        future_layer_1_delta = layer_1_delta


    synapse_0 += synapse_0_update * alpha
    synapse_1 += synapse_1_update * alpha
    synapse_h += synapse_h_update * alpha    
    synapse_0_update *= 0
    synapse_1_update *= 0
    synapse_h_update *= 0
    
    # print out progress
    if(j % 1000 == 0):
        print ("Error:" + str(overallError))
        print ("Pred:" + str(d))
        print ("True:" + str(c))
        out = 0
        for index,x in enumerate(reversed(d)):
            out += x*pow(2,index)
        print (str(a_int) + " + " + str(b_int) + " = " + str(out))
        print ("------------")

# 代码 不成功

In [None]:
# 不成功！

# -*- coding: utf-8 -*-

#https://blog.csdn.net/bi_diu1368/article/details/90551891

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.contrib import rnn
mnist = input_data.read_data_sets('C:\\Users\\yuli\\[机器学习方法]++++++++++++++\\MINST_data',one_hot=True)


#定义参数

#输入一行，一行有28个数据
n_input = 28

#一共有28行
max_time = 28

#100个隐藏单元
lstm_size = 10

#10分类
n_class = 10

#每批次分50个样本
batch_size = 50

#一共有n_batch个批次
n_batch = int(mnist.train.num_examples/batch_size)


#定义输入
with tf.name_scope('input'):
    x = tf.placeholder(tf.float32,[None,784],name='x-input')
    y = tf.placeholder(tf.float32,[None,10],name='y-input')

#初始化权重值
weights = tf.Variable(tf.truncated_normal([lstm_size,n_class],stddev=0.1))
biases = tf.Variable(tf.constant(0.1,shape=[n_class]))


#定义RNN网络
def RNN(X,weights,biases):
    
    with tf.name_scope('RNN'):
        #input = [batch_size,max_time,n_input]
        inputs = tf.reshape(X,[-1,max_time,n_input])
        #定义LSTM基本的cell
        lstm_cell = rnn.BasicLSTMCell(lstm_size)
        #final_state[0] 是 cell state
        #final_state[1] 是 hidden_state
        outputs,final_state = tf.nn.dynamic_rnn(lstm_cell, inputs, dtype=tf.float32)
        results = tf.nn.softmax(tf.matmul(final_state[1],weights) + biases)
        return results

prediction = RNN(x,weights,biases)


#损失函数
with tf.name_scope('loss'):
    loss =  tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits= prediction,labels=y))

#优化器
with tf.name_scope('optimizer'):
    train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)

#结果存在一个bool类型的值中
correct_prediction = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))

#求准确率
with tf.name_scope('accuracy'):
    accuracy =  tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    

init = tf.global_variables_initializer()

with tf.Session() as sess:
    
    sess.run(init)
    
    for epoch in range(2):#range(21)
        
        for batch in range(n_batch):
            batch_xs,batch_ys =  mnist.train.next_batch(batch_size)
            sess.run(train_step,feed_dict={x:batch_xs,y:batch_ys})
        
        acc = sess.run(accuracy,feed_dict={x:mnist.test.images,y:mnist.train.labels})
        
        print('Iter '+ str(epoch) + " ,     Test accuracy = " + str(acc) )

# RNN对IMDB电影评论建模(成功2019-8-5)+++++

In [None]:
#1.准备数据
from keras.datasets import imdb
from keras.preprocessing import sequence

max_features=10000
maxlen = 500
batch_size=32

print('Loading data...')

PATH='C:\\Users\\yuli\\[机器学习方法]++++++++++++++\\IMDB_data\\imdb.npz'
(input_train,y_train),(input_test,y_test) = imdb.load_data(path=PATH,num_words=max_features)

print(len(input_train),'train sequences')
print(len(input_test),'test sequences')

print('input_train shape:',input_train.shape)
print('input_test shape:',input_test.shape)


In [None]:
input_train

In [None]:
y_train

In [None]:
print('Pad sequences (samples x time)')
input_train = sequence.pad_sequences(input_train,maxlen=maxlen)
input_test = sequence.pad_sequences(input_test,maxlen=maxlen)
print('input_train shape:',input_train.shape)
print('input_test shape:',input_test.shape)

In [None]:
input_train

In [None]:
#2.建立模型并训练
from keras.layers import Dense
from keras.models import Sequential
from keras.layers import Dense,SimpleRNN,Activation,Embedding

model = Sequential()
model.add(Embedding(max_features,32))
model.add(SimpleRNN(32))
model.add(Dense(1,activation='sigmoid'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])

history = model.fit(input_train,
                    y_train,
                    epochs=5,
                    batch_size=128,
                    validation_split=0.2)


In [None]:
history

In [None]:
#3.绘制曲线
import matplotlib.pyplot as plt
%matplotlib inline
def plot_curve(history):
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(1,len(acc)+1)
    
    plt.plot(epochs,acc,'bo',label='Training acc')
    plt.plot(epochs,val_acc,'b',label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()
    
    plt.figure()
    
    plt.plot(epochs,loss,'bo',label='Training loss')
    plt.plot(epochs,val_loss,'b',label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
    
plot_curve(history)


# kears电影评论（成功2019-8-5）

In [None]:
from keras.models import Sequential  
from keras.layers import Input, Dense, Dropout, Activation
from keras.models import Model
from keras import optimizers
from keras.optimizers import SGD
from keras.datasets import imdb
import numpy as np
import matplotlib.pyplot as plt
from keras.utils.vis_utils import plot_model

tBatchSize = 512
Epochs = 10

model = Sequential() 

model.add(Dense(500,input_shape=(10000,))) 
model.add(Activation('relu'))
model.add(Dropout(0.5)) 
 
model.add(Dense(500))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(100)) 
model.add(Activation('relu'))
model.add(Dropout(0.3))

model.add(Dense(16)) 
model.add(Activation('relu'))
model.add(Dropout(0.1))
 
model.add(Dense(1)) 
model.add(Activation('sigmoid'))

model.summary()
plot_model(model,to_file = 'model.png',show_shapes = True)

model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr = 0.001),metrics=['acc'])
 

In [None]:

def vectorize_sequences(sequences,dimention = 10000):
    results = np.zeros((len(sequences),dimention))
    for i,sequence in enumerate(sequences):
        results[i,sequence] = 1.
    return results

 #load data
PATH='C:\\Users\\yuli\\[机器学习方法]++++++++++++++\\IMDB_data\\imdb.npz'
(X_train, y_train), (X_test, y_test) = imdb.load_data(path=PATH,num_words = 10000)

print('Loading data...')

X_train = vectorize_sequences(X_train)
X_test = vectorize_sequences(X_test)

y_train = np.asarray(y_train).astype('float32')
y_test = np.asarray(y_test).astype('float32')

history = model.fit(X_train, y_train, batch_size=tBatchSize, epochs=Epochs, verbose=2,shuffle=True, validation_split=0.3)

score = model.evaluate(X_test,y_test, batch_size=tBatchSize)
print("The score:",score[0])
print("Tne accuracy:",score[1])

history_dic = history.history
loss_values = history_dic['loss']
val_loss_values = history_dic['val_loss']
acc = history_dic['acc']
val_acc = history_dic['val_acc']

epochs = range(1,len(loss_values)+1)
fig = plt.figure()

ax1 = fig.add_subplot(111)
ax1.plot(epochs,loss_values,'bo',label = 'Training loss')
ax1.plot(epochs,val_loss_values,'b',label = 'Validation loss')
plt.title("Training and validstion loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend(bbox_to_anchor=(1,0),loc = 3,borderaxespad = 0)
plt.show()
'''
ax2 = fig.add_subplot(111)
ax2.plot(epochs,acc,'bo',label = 'Training acc')
ax2.plot(epochs,val_acc,'b',label = 'Validation acc')
plt.title("Training and validstion accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(bbox_to_anchor=(1,0),loc = 3,borderaxespad = 0)

plt.show()
'''