In [16]:
import tensorflow as tf  
import numpy as np  
from tensorflow.examples.tutorials.mnist import input_data
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# number 1 to 10 data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

tf.reset_default_graph()
with tf.Graph().as_default() as g:
    # 构建层
    def add_layer(inputs, input_size, output_size, activation_function):
        with tf.name_scope('Layer'):
            with tf.name_scope('Weights'):
                Weights = tf.Variable(tf.random_normal([input_size, output_size]))
                tf.summary.histogram('Weights', Weights)
            with tf.name_scope('Bias'):
                bias = tf.Variable(tf.zeros([1, output_size]) + 0.1)
                tf.summary.histogram('Bias', bias)
            wx_b = tf.add(tf.matmul(inputs, Weights), bias)
            if activation_function != None:
                output = activation_function(wx_b)
            else:
                output = wx_b
            tf.summary.histogram('Output', output)
        return output

    # 定义准确率计算
    def computer_accuracy(v_x, v_y):
        global prediction
        y_pre = sess.run(prediction, feed_dict={xs: v_x})
        correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype=tf.float32))
        result = sess.run(accuracy, feed_dict={xs: v_x, ys: v_y})
        return result


    # 搭建网络 
    xs = tf.placeholder(tf.float32, [None, 784]) # 28*28
    ys = tf.placeholder(tf.float32, [None, 10])

    prediction = add_layer(xs, 784, 10, tf.nn.softmax)

    # 分类问题用交叉熵
    with tf.name_scope('CrossEntrphy'):
        cross_entrypy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1]))
        tf.summary.scalar('CrossEntrpyh', cross_entrypy)

    train = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entrypy)

    init = tf.global_variables_initializer()


    with tf.Session() as sess:        
        sess.run(init)
        writer = tf.summary.FileWriter('./logs', sess.graph)  
        mergerd = tf.summary.merge_all()         
        for i in range(1000):
            batch_x, batch_y = mnist.train.next_batch(100)
            sess.run(train, feed_dict={xs: batch_x, ys: batch_y})
            if i % 50 == 0:
                print(computer_accuracy(mnist.test.images, mnist.test.labels))
                rs = sess.run(mergerd, feed_dict={xs: batch_x, ys: batch_y})
                writer.add_summary(rs, i)


Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
0.1404
0.639
0.7256
0.7758
0.8013
0.8107
0.8264
0.8376
0.8437
0.8497
0.8532
0.8549
0.8599
0.858
0.8641
0.8648
0.8663
0.8688
0.8719
0.8729


# Dropout

In [3]:
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

# load data
digits = load_digits()
X = digits.data
y = digits.target
y = LabelBinarizer().fit_transform(y) # 把数字变成独热编码，二值化变成（0，1）
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)


def add_layer(inputs, input_size, output_size, layer_name, activation_function=None):
    Weights = tf.Variable(tf.random.normal([input_size, output_size]))
    biases = tf.Variable(tf.zeros([1, output_size]) + 0.1)
    Wx_plus_bias = tf.add(tf.matmul(inputs, Weights), biases)
    
    # add dropout
    Wx_plus_bias = tf.nn.dropout(Wx_plus_bias, keep_prob)
    if activation_function == None:
        outputs = Wx_plus_bias
    else:
        outputs = activation_function(Wx_plus_bias)
    return outputs

# define placeholder
xs = tf.placeholder(tf.float32, [None, 64])
ys = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)

# add hidden layer
l1 = add_layer(xs, 64, 50, 'layer1', tf.nn.tanh) # 神经元个数不能过多，activation_function用tf.nn.tanh，否则数据会变成Nan
prediction = add_layer(l1, 50, 10, 'layer2', tf.nn.softmax)

# loss
cross_entropy = tf.reduce_mean( -tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1]))
tf.summary.scalar('CrossEntropy', cross_entropy)

train = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

init = tf.global_variables_initializer()
mergerd = tf.summary.merge_all()


with tf.Session() as sess:
    sess.run(init)
    train_writer = tf.summary.FileWriter('logs/train', sess.graph)
    test_writer = tf.summary.FileWriter('logs/test', sess.graph)
    for i in range(1000):
        sess.run(train, feed_dict={xs: X_train, ys: y_train, keep_prob: 0.5})
        if i % 50 == 0:
            train_result = sess.run(mergerd, feed_dict={xs: X_train, ys: y_train, keep_prob: 1.0})
            test_result = sess.run(mergerd, feed_dict={xs: X_test, ys: y_test, keep_prob: 1.0})
            train_writer.add_summary(train_result, i)
            test_writer.add_summary(test_result, i)


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


# CNN

In [1]:
'''
tf.truncated_normal(shape, mean, stddev)
1.截断的产生正太分布的函数。
2.就是说产生正太分布的值如果与均值的差值大于两倍的标准差，那就重新生成。
3.和一般的正太分布的产生随机数据比起来，这个函数产生的随机数与均值的差距不会超过两倍的标准差，但是一般的别的函数是可能的。
'''

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

#number 1 to 10 data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

def accuracy(v_x, v_y):
    global prediction
    y_pre = sess.run(prediction, feed_dict={xs: v_x, ys: v_y, keep_prob: 1.0})
    acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(v_y, 1), tf.argmax(y_pre, 1)), dtype=tf.float32))
    ret = sess.run(acc, feed_dict={xs: v_x, ys: v_y, keep_prob: 1.0})                        
    return ret

def weight_variable(shape):
    return tf.Variable(tf.truncated_normal(shape, 0, 0.1))

def bias_variable(shape):
    return tf.Variable(tf.constant(0.1, shape=shape))

def con2d(x, ft): 
    '''
    input: shape为[batch, in_height, in_weight, in_channel]
    
    filter：卷积核，要求也是一个张量，shape为 [ filter_height, filter_weight, in_channel, out_channels ]，
            其中 filter_height 为卷积核高度，filter_weight 为卷积核宽度;
            in_channel 是图像通道数,和 input 的 in_channel 要保持一致
            out_channel 是卷积核数量。
    
    strides：卷积时在图像每一维的步长，这是一个一维的向量，[ 1, strides, strides, 1]，第一位和最后一位固定必须是1
    
    padding：string类型，值为“SAME”和 “VALID”，表示的是卷积的形式，是否考虑边界；
             "SAME"是考虑边界，不足的时候用0去填充周围，"VALID"则不考虑。
    '''
    return tf.nn.conv2d(x, ft, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    '''
    tf.nn.max_pool(value, ksize, strides, padding, name=None)
    
    value：需要池化的输入，一般池化层接在卷积层后面，所以输入通常是feature map，
            依然是[batch, height, width, channels]这样的shape

    ksize：池化窗口的大小，取一个四维向量，一般是[1, height, width, 1]，因为我们不想在batch和channels上做池化，所以这两个维度设为了1

    strides：和卷积类似，窗口在每一个维度上滑动的步长，一般也是[1, stride,stride, 1]

    padding：和卷积类似，可以取'VALID' 或者'SAME'

    返回一个Tensor，类型不变，shape仍然是[batch, height, width, channels]这种形式

    '''
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


xs = tf.placeholder(tf.float32, [None, 28*28])
ys = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
x_input = tf.reshape(xs, [-1, 28, 28, 1])


# con1
filter1 = weight_variable([5, 5, 1, 32]) # patch 5 * 5, in size 1; out size 32
bias1 = bias_variable([32])
h1 = tf.nn.relu(con2d(x_input, filter1) + bias1)
h1_pool = max_pool_2x2(h1) #  14*14*32

# con2
filter2 = weight_variable([5, 5, 32, 64])
bias2 = bias_variable([64])
h2 = tf.nn.relu(con2d(h1_pool, filter2) + bias2)
h2_pool = max_pool_2x2(h2) # 7*7*64

# fc1
w_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
# h2_pool:shape[samples, 7, 7, 64] --> [samples, 7*7*64] 三维变成一维，flat
h2_pool_flat = tf.reshape(h2_pool, [-1, 7*7*64])
fc1 = tf.nn.relu(tf.matmul(h2_pool_flat, w_fc1) + b_fc1)
fc1_drop = tf.nn.dropout(fc1, keep_prob)

# fc2
w_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
prediction = tf.nn.softmax(tf.matmul(fc1_drop, w_fc2) + b_fc2)

loss = tf.reduce_mean( -tf.reduce_sum(ys*tf.log(prediction), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

# with tf.Session() as sess:
#     sess.run(init)
#     for i in range(1000):
#         batch_x, batch_y = mnist.train.next_batch(100)
#         sess.run(train_step, feed_dict={xs: batch_x, ys: batch_y, keep_prob: 0.7})
#         if i % 100 == 0:
#             print(accuracy(mnist.test.images[:1000], mnist.test.labels[:1000]))
#     saver.save(sess, 'models/mnist.ckpt')

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [3]:
import tensorflow as tf
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph('models/mnist.ckpt.meta')
    new_saver.restore(sess, tf.train.latest_checkpoint('models/'))
    print(accuracy(mnist.test.images[4000:5000], mnist.test.labels[4000:5000]))

INFO:tensorflow:Restoring parameters from models/mnist.ckpt
0.979
