In [1]:
##数据处理
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit
import tensorflow as tf
import numpy as np

dataset=pd.read_csv("C:\\Users\\LCL\\Desktop\\sklearn_tensorflow\\kaggle\\Projects\\mnist\\train.csv")
testset=pd.read_csv("C:\\Users\\LCL\\Desktop\\sklearn_tensorflow\\kaggle\\Projects\\mnist\\test.csv")

#分层采样
split = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
for train_index, test_index in split.split(dataset, dataset["label"]):
    train = dataset.loc[train_index]
    val = dataset.loc[test_index]
X_train=train.drop("label", axis=1)
y_train=train["label"].copy()
X_val=val.drop("label", axis=1)
y_val=val["label"].copy()

#检验分层采样结果
y_train.value_counts()/y_train.shape[0]
y_val.value_counts()/y_val.shape[0]

#清理多余内容
del train
del val

#检查是否有缺失值
X_train.isnull().any().describe()
testset.isnull().any().describe()

#标准化
X_train = X_train / 255.0
testset = testset / 255.0
X_train = X_train.values.reshape(-1,28,28,1)
y_train=y_train.values
X_val = X_val.values.reshape(-1,28,28,1)
y_val=y_val.values
X_test = testset.values.reshape(-1,28,28,1)

#数据增强
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)
X_train_list=[]
y_train_list=[]
for i in range(X_train.shape[0]):
    gen=datagen.flow(X_train[i].reshape(1,28,28,1),batch_size=1)
    X_train_list.append(X_train[i].reshape(1,28,28,1))
    y_train_list.append(y_train[i])
    for j in range(5):
        x_batch = next(gen)
        X_train_list.append(x_batch)
        y_train_list.append(y_train[i])
X_train=np.array(X_train_list)
X_train=X_train.reshape(X_train.shape[0],28,28,1)
y_train=np.array(y_train_list)
        

#检查结果
# import matplotlib.pyplot as plt
# import time
# for i in range(50):
#     plt.imshow(X_train[(i*10+5)*7][:,:,0])
#     plt.show()
#     print(y_train[(i*10+5)*7])
#     time.sleep(5)

In [2]:
##CNN
from functools import partial

X_TEST_SHAPE=28000
INPUT_SHAPE=28
INPUT_CHANNEL=1
KERNEL_SIZE_1=5
FILTERS_1=32
KERNEL_SIZE_2=3
FILTERS_2=64
DROPOUT_CONV=0.25
DROPOUT_DENSE=0.5
N_DENSE=256
N_OUTPUTS=10
LEARNING_RATE=0.001
BATCH_SIZE=200
BATCH_SIZE_TEST=7000

tf.reset_default_graph()

my_pool_layer=partial(tf.nn.max_pool,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID")

X=tf.placeholder(tf.float32,shape=(BATCH_SIZE,INPUT_SHAPE,INPUT_SHAPE,1),name="X")
X_valid=tf.placeholder(tf.float32,shape=(X_val.shape[0],INPUT_SHAPE,INPUT_SHAPE,1),name="X_valid")
X_test_input=tf.placeholder(tf.float32,shape=(BATCH_SIZE_TEST,INPUT_SHAPE,INPUT_SHAPE,1),name="X_test")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')

def conv(X,name,input_channel,kernel_size,filters):
    with tf.variable_scope(name):
        he_init = tf.variance_scaling_initializer()
        kernel_filters=tf.get_variable(initializer=he_init,shape=(kernel_size,kernel_size,input_channel,filters),
                                name="kernel")
        bias=tf.get_variable(initializer=tf.zeros([filters]),name="bias")
        convolution=tf.nn.conv2d(X,kernel_filters,strides=[1,1,1,1],padding="SAME")
        relu=tf.nn.relu(tf.nn.bias_add(convolution,bias))
        return relu

#用于训练
with tf.variable_scope("convolution_layer"):
    
    conv1=conv(X,"conv1",INPUT_CHANNEL,KERNEL_SIZE_1,FILTERS_1)
    conv2=conv(conv1,"conv2",FILTERS_1,KERNEL_SIZE_1,FILTERS_1)
    pool1=my_pool_layer(conv2,name="pool1")
    pool1_drop=tf.layers.dropout(pool1,DROPOUT_CONV,training=training)
    
    conv3=conv(pool1,"conv3",FILTERS_1,KERNEL_SIZE_2,FILTERS_2)
    conv4=conv(conv3,"conv4",FILTERS_2,KERNEL_SIZE_2,FILTERS_2)
    pool2=my_pool_layer(conv4,name="pool2")
    pool2_drop=tf.layers.dropout(pool2,DROPOUT_CONV,training=training)

#用于验证
with tf.variable_scope("convolution_layer",reuse=True):
    
    conv1_valid=conv(X_valid,"conv1",INPUT_CHANNEL,KERNEL_SIZE_1,FILTERS_1)
    conv2_valid=conv(conv1_valid,"conv2",FILTERS_1,KERNEL_SIZE_1,FILTERS_1)
    pool1_valid=my_pool_layer(conv2_valid,name="pool1")
    pool1_drop_valid=tf.layers.dropout(pool1_valid,DROPOUT_CONV,training=training)
    
    conv3_valid=conv(pool1_valid,"conv3",FILTERS_1,KERNEL_SIZE_2,FILTERS_2)
    conv4_valid=conv(conv3_valid,"conv4",FILTERS_2,KERNEL_SIZE_2,FILTERS_2)
    pool2_valid=my_pool_layer(conv4_valid,name="pool2")
    pool2_drop_valid=tf.layers.dropout(pool2_valid,DROPOUT_CONV,training=training)
    
#用于测试
with tf.variable_scope("convolution_layer",reuse=True):
    
    conv1_test=conv(X_test_input,"conv1",INPUT_CHANNEL,KERNEL_SIZE_1,FILTERS_1)
    conv2_test=conv(conv1_test,"conv2",FILTERS_1,KERNEL_SIZE_1,FILTERS_1)
    pool1_test=my_pool_layer(conv2_test,name="pool1")
    pool1_drop_test=tf.layers.dropout(pool1_test,DROPOUT_CONV,training=training)
    
    conv3_test=conv(pool1_test,"conv3",FILTERS_1,KERNEL_SIZE_2,FILTERS_2)
    conv4_test=conv(conv3_test,"conv4",FILTERS_2,KERNEL_SIZE_2,FILTERS_2)
    pool2_test=my_pool_layer(conv4_test,name="pool2")
    pool2_drop_test=tf.layers.dropout(pool2_test,DROPOUT_CONV,training=training)
    
def neuron_layer(X, n_neurons, name, init,activation=None):
    with tf.variable_scope(name):
        n_inputs = int(X.get_shape()[1])
        W = tf.get_variable(initializer=init,shape=(n_inputs,n_neurons),name="kernel")
        b = tf.get_variable(initializer=tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

#用于训练
with tf.variable_scope("full_connected_layer"):
    he_init = tf.variance_scaling_initializer()
    flatten=tf.layers.flatten(pool2_drop)
    dense = neuron_layer(flatten,N_DENSE,name="dense",init=he_init,activation=tf.nn.relu)
    dense_drop=tf.layers.dropout(dense,DROPOUT_DENSE,training=training)
    logits = neuron_layer(dense_drop,N_OUTPUTS,name="outputs",init=he_init)

#用于验证
with tf.variable_scope("full_connected_layer",reuse=True):
    he_init = tf.variance_scaling_initializer()
    flatten_valid=tf.layers.flatten(pool2_drop_valid)
    dense_valid = neuron_layer(flatten_valid,N_DENSE,name="dense",init=he_init,activation=tf.nn.relu)
    dense_drop_valid=tf.layers.dropout(dense_valid,DROPOUT_DENSE,training=training)
    logits_valid = neuron_layer(dense_drop_valid,N_OUTPUTS,name="outputs",init=he_init)

#用于测试
with tf.variable_scope("full_connected_layer",reuse=True):
    he_init = tf.variance_scaling_initializer()
    flatten_test=tf.layers.flatten(pool2_drop_test)
    dense_test = neuron_layer(flatten_test,N_DENSE,name="dense",init=he_init,activation=tf.nn.relu)
    dense_drop_test=tf.layers.dropout(dense_test,DROPOUT_DENSE,training=training)
    logits_test = neuron_layer(dense_drop_test,N_OUTPUTS,name="outputs",init=he_init)

#加入collection
tf.add_to_collection("collection",logits_test)

#用于训练
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32),name="accuracy")
    
#用于验证
with tf.name_scope("eval_test"):
    correct_valid = tf.nn.in_top_k(logits_valid, y, 1)
    accuracy_valid = tf.reduce_mean(tf.cast(correct_valid, tf.float32),name="accuracy")
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [3]:
n_epochs = 10

def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [4]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, BATCH_SIZE):
            sess.run(training_op,
                     feed_dict={training: True, X: X_batch, y: y_batch})
            
#使用验证集检验当前模型准确率
        accuracy=sess.run(accuracy_valid,feed_dict={X_valid:X_val,y:y_val})
        print("epoch:",epoch," valid accurcay:",accuracy)
        
#对测试集分批，否则会产生OOM
    y_pred_list=[]
    for i in range(int(X_TEST_SHAPE/BATCH_SIZE_TEST)):
        Z = logits_test.eval(feed_dict={X_test_input:X_test[i*BATCH_SIZE_TEST:i*BATCH_SIZE_TEST+BATCH_SIZE_TEST]})    
        y_pred = np.argmax(Z, axis=1)
        y_pred_list.append(y_pred)
        
    save_path = saver.save(sess, "C:\\Users\\LCL\\Desktop\\sklearn_tensorflow\\kaggle\\Projects\\mnist\\mnist_model.ckpt")

y_pred=y_pred_list[0]
for pred in y_pred_list[1:]:
    y_pred=np.append(y_pred,pred)

#保存为csv文件
y_df=pd.DataFrame(y_pred)
y_df.to_csv('C:\\Users\\LCL\\Desktop\\sklearn_tensorflow\\kaggle\\Projects\\mnist\\data_pred.csv')

epoch: 0  valid accurcay: 0.990238
epoch: 1  valid accurcay: 0.992857
epoch: 2  valid accurcay: 0.994048
epoch: 3  valid accurcay: 0.993095
epoch: 4  valid accurcay: 0.994048
epoch: 5  valid accurcay: 0.995
epoch: 6  valid accurcay: 0.994762
epoch: 7  valid accurcay: 0.994762
epoch: 8  valid accurcay: 0.994762
epoch: 9  valid accurcay: 0.995
