In [3]:
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import tensorflow as tf
import numpy as np
import random

# 将 numpy 数组中的图片和标签顺序打乱
def shuffer_images_and_labels(images, labels):
    shuffle_indices = np.random.permutation(np.arange(len(images)))
    shuffled_images = images[shuffle_indices]
    shuffled_labels = labels[shuffle_indices]
    return shuffled_images, shuffled_labels

# 将label从长度10的one hot向量转换为0~9的数字
# 例：get_label(total_labels[0]) 获取到total_labels中第一个标签对应的数字
def get_label(label):
    return np.argmax(label)

# images：训练集的feature部分
# labels：训练集的label部分
# batch_size： 每次训练的batch大小
# epoch_num： 训练的epochs数
# shuffle： 是否打乱数据
# 使用示例：
#   for (batchImages, batchLabels) in batch_iter(images_train, labels_train, batch_size, epoch_num, shuffle=True):
#       sess.run(feed_dict={inputLayer: batchImages, outputLabel: batchLabels})
def batch_iter(images,labels, batch_size, epoch_num, shuffle=True):
    data_size = len(images)
    num_batches_per_epoch = int(data_size / batch_size)  # 样本数/batch块大小,多出来的“尾数”，不要了
    for epoch in range(epoch_num):
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data_feature = images[shuffle_indices]
            shuffled_data_label   = labels[shuffle_indices]
        else:
            shuffled_data_feature = images
            shuffled_data_label = labels
        for batch_num in range(num_batches_per_epoch):   # batch_num取值0到num_batches_per_epoch-1
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)

            yield (shuffled_data_feature[start_index:end_index] , shuffled_data_label[start_index:end_index])


# 读取数据集
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)

total_images = mnist.train.images
total_labels = mnist.train.labels
total_images, total_labels = shuffer_images_and_labels(total_images, total_labels)

# 简单划分前50000个为训练集，后5000个为测试集
origin_images_train = total_images[:50000]
origin_labels_train = total_labels[:50000]
origin_images_test = total_images[50000:]
origin_labels_test = total_labels[50000:]

# 构建和训练模型
def train_and_test1(images_train, labels_train, images_test, labels_test, images_validation, labels_validation):
    x = tf.placeholder(tf.float32,[None,784],name = "X")
    y = tf.placeholder(tf.float32,[None,10],name = "Y")
   
    #隐藏层神经元数量
    H1_NN = 256#第1隐藏层神经元为256个
    H2_NN = 64#第2隐藏层神经元为64个
    #输入层 - 第1隐藏层参数和偏置顶
    W1 = tf.Variable(tf.truncated_normal([784,H1_NN],stddev = 0.1))
    b1 = tf.Variable(tf.zeros([H1_NN]))
    #第1隐藏层- 第2隐藏层参数和偏置顶
    W2 = tf.Variable(tf.truncated_normal([H1_NN,H2_NN],stddev = 0.1))
    b2 = tf.Variable(tf.zeros([H2_NN]))
    #第2隐藏层- 输出层
    W3 = tf.Variable(tf.truncated_normal([H2_NN,10],stddev = 0.1))
    b3 = tf.Variable(tf.zeros([10]))
    Y1 = tf.nn.relu(tf.matmul(x,W1)+b1)
    Y2 = tf.nn.relu(tf.matmul(Y1,W2)+b2)
    forward = tf.matmul(Y2,W3)+b3
    pred = tf.nn.softmax(forward)
    #交叉熵
    loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=forward,labels=y))
    train_epochs = 40#训练轮数
    batch_size = 60#单次训练样本数
    total_batch = int(len(images_train)/batch_size)#一轮训练有多少批次
    #splay_step = 1#显示粒度
    learning_rate = 0.003#学习率
    #优化器
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss_function)
    correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(pred,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    #建立会话
    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)
    count = 1
    for (batchImages, batchLabels) in batch_iter(images_train, labels_train, batch_size, train_epochs, shuffle=True):
        sess.run(optimizer,feed_dict={x: batchImages, y: batchLabels})
        # accu_test = sess.run(accuracy,feed_dict={x:images_test,y:labels_test})
        
        if count%total_batch== 0:
            accu_test = sess.run(accuracy,feed_dict={x:images_test,y:labels_test})
            print("Accuracy:",accu_test)  
        count += 1
    print("Final Accuracy:",sess.run(accuracy,feed_dict={x:images_validation,y:labels_validation}))
    sess.close()
    print("Train finied!")
# 划分数据集并调用train_and_test测试和验证

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [2]:
# 使用简单划分的训练集和测试集训练，并使用测试集评估模型
from time import time
start_time=time()
train_and_test1(origin_images_train, origin_labels_train, origin_images_test, origin_labels_test, origin_images_test, origin_labels_test)
print("during_time:",time()-start_time)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Accuracy: 0.9584
Accuracy: 0.9688
Accuracy: 0.9736
Accuracy: 0.9742
Accuracy: 0.967
Accuracy: 0.97
Accuracy: 0.9722
Accuracy: 0.9734
Accuracy: 0.973
Accuracy: 0.9734
Accuracy: 0.973
Accuracy: 0.9748
Accuracy: 0.9698
Accuracy: 0.9766
Accuracy: 0.9732
Accuracy: 0.972
Accuracy: 0.9746
Accuracy: 0.9772
Accuracy: 0.978
Accuracy: 0.9774
Accuracy: 0.9752
Accuracy: 0.9754
Accuracy: 0.9758
Accuracy: 0.973
Accuracy: 0.9776
Accuracy: 0.9734
Accuracy: 0.975
Accuracy: 0.9744
Accuracy: 0.9706
Accuracy: 0.9776
Accuracy: 0.9786
Accuracy: 0.9726
Accuracy: 0.976
Accuracy: 0.9762
Accuracy: 0.9746
Accuracy: 0.9776
Accuracy: 0.9788
Accuracy: 0.973
Accuracy: 0.9802
Accuracy: 0.977
Final Accuracy: 0.977
Train finied!
during_time: 119.2706687450409


In [28]:
def train_and_test2(images_train, labels_train, images_test, labels_test, images_validation, labels_validation):
    x = tf.placeholder(tf.float32,[None,784],name = "X")
    y = tf.placeholder(tf.float32,[None,10],name = "Y")
    #1层隐藏层
    ##隐藏层神经元数量
    H1_NN = 256
    W1 = tf.Variable(tf.random_normal([784,H1_NN]))
    b1 = tf.Variable(tf.zeros([H1_NN]))
    #激活函数ReLU
    Y1 = tf.nn.relu(tf.matmul(x,W1)+b1)
    #构建输出层
    W2 = tf.Variable(tf.random_normal([H1_NN,10]))
    b2 = tf.Variable(tf.zeros([10]))
    #前向计算
    forward = tf.matmul(Y1,W2) + b2
    pred = tf.nn.softmax(forward)
    #交叉熵
    loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=forward,labels=y))
    train_epochs = 50#训练轮数
    batch_size = 200#单次训练样本数
    total_batch = int(len(images_train)/batch_size)#一轮训练有多少批次
    learning_rate = 0.01#学习率
    #优化器
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss_function)
    correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(pred,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    #建立会话
    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)
    count = 0
    for (batchImages, batchLabels) in batch_iter(images_train, labels_train, batch_size, train_epochs, shuffle=True):
        sess.run(optimizer,feed_dict={x: batchImages, y: batchLabels})
        if count%total_batch== 0:
            accu_test = sess.run(accuracy,feed_dict={x:images_test,y:labels_test})
            print("Accuracy:",accu_test)  
        count += 1
    #accu_validation = sess.run(accuracy,feed_dict={x:images_validation,y:labels_validation})
    #print("Accuracy:",accu_validation)
    print("Accuracy:",accu_test)
    sess.close()
  #  return accu_validation.item()
    return accu_test.item()
# 划分数据集并调用train_and_test测试和验证

In [29]:
# 使用简单划分的训练集和测试集训练，并使用测试集评估模型
from time import time
start_time=time()
train_and_test2(origin_images_train, origin_labels_train, origin_images_test, origin_labels_test, origin_images_test, origin_labels_test)
print("during_time:",time()-start_time)

Accuracy: 0.1038
Accuracy: 0.916
Accuracy: 0.932
Accuracy: 0.9406
Accuracy: 0.9446
Accuracy: 0.9528
Accuracy: 0.9548
Accuracy: 0.954
Accuracy: 0.9598
Accuracy: 0.9594
Accuracy: 0.9628
Accuracy: 0.959
Accuracy: 0.9596
Accuracy: 0.9598
Accuracy: 0.9624
Accuracy: 0.9674
Accuracy: 0.963
Accuracy: 0.96
Accuracy: 0.9678
Accuracy: 0.962
Accuracy: 0.9652
Accuracy: 0.964
Accuracy: 0.9686
Accuracy: 0.9676
Accuracy: 0.969
Accuracy: 0.97
Accuracy: 0.969
Accuracy: 0.9708
Accuracy: 0.9674
Accuracy: 0.9696
Accuracy: 0.9678
Accuracy: 0.965
Accuracy: 0.9686
Accuracy: 0.97
Accuracy: 0.9696
Accuracy: 0.9662
Accuracy: 0.9726
Accuracy: 0.9692
Accuracy: 0.9722
Accuracy: 0.974
Accuracy: 0.9736
Accuracy: 0.9724
Accuracy: 0.9736
Accuracy: 0.9696
Accuracy: 0.9704
Accuracy: 0.9672
Accuracy: 0.967
Accuracy: 0.9704
Accuracy: 0.972
Accuracy: 0.972
Accuracy: 0.972
during_time: 65.01522135734558


In [30]:
def hold_out(images, labels, train_percentage):
    images_train,images_test,\
    labels_train,labels_test=train_test_split(images, labels,test_size=train_percentage,random_state=0)
    train_and_test2(images_train, labels_train, images_test, labels_test,images_test,labels_test)

In [17]:
print(total_images.shape[1])
print(origin_images_test.shape)

784
(5000, 784)


In [23]:
# 调用函数用留出法和k折交叉验证法评估模型
hold_out(total_images, total_labels, 0.2)


Accuracy: 0.9738182


In [24]:
hold_out(total_images, total_labels, 0.4)

Accuracy: 0.9685909


In [25]:
hold_out(total_images, total_labels, 0.6)

Accuracy: 0.9670303


In [21]:
hold_out(total_images, total_labels, 0.8)

Accuracy: 0.95954543


In [26]:
hold_out(total_images, total_labels, 0.9)

Accuracy: 0.9349495


In [28]:
def cross_validation(images, labels, k):
    count1 = 0.0
    kf = KFold(n_splits =k,shuffle= True)
    i = 1
    for train_index, test_index in kf.split(images):
        images_train,images_test = images[train_index],images[test_index]
        labels_train,labels_test = labels[train_index],labels[test_index]
        print("Train:",i)
        i +=1
        temp=train_and_test2(images_train, labels_train, images_test, labels_test, images_test, labels_test)
        count1 = count1 + temp
    average = count1/k
    print("Average:",average)        


In [29]:
cross_validation(origin_images_train, origin_labels_train, 5)

Train: 1
Accuracy: 0.9725
Train: 2
Accuracy: 0.9671
Train: 3
Accuracy: 0.9714
Train: 4
Accuracy: 0.9691
Train: 5
Accuracy: 0.9697
Average: 0.9699600100517273


In [30]:
cross_validation(origin_images_train, origin_labels_train, 10)

Train: 1
Accuracy: 0.9732
Train: 2
Accuracy: 0.9758
Train: 3
Accuracy: 0.974
Train: 4
Accuracy: 0.9676
Train: 5
Accuracy: 0.9716
Train: 6
Accuracy: 0.9702
Train: 7
Accuracy: 0.9702
Train: 8
Accuracy: 0.9734
Train: 9
Accuracy: 0.9672
Train: 10
Accuracy: 0.9676
Average: 0.9710799932479859


In [3]:
from time import time
start_time=time()
cross_validation(origin_images_train, origin_labels_train, 15)
print("during_time:",time()-start_time)

NameError: name 'cross_validation' is not defined