In [1]:
import tensorflow as tf
import numpy as np
import os

# 数据准备
首先看一下CIFAR-10数据是怎么读取的，下面的函数是根据官网示例改编的函数，其直接返回ndarray形式的X与Y。

In [2]:
def unpickle(file):
    '''
    CIFAR-10数据读取函数
    '''
    import pickle
    with open(file, 'rb') as fd:
        data = pickle.load(fd, encoding='bytes')
    return data[b'data'], np.array(data[b'labels'])


data, target = unpickle('../dataset/cifar-10-batches-py/data_batch_1')
print(data.shape, target.shape)

(10000, 3072) (10000,)


对于深度学习中的大型数据，mini-batch式学习是很有必要的，并且还会频繁对数据做一些其他的操作。所以定义一个专门的数据类用于管理数据：

In [3]:
from sklearn.preprocessing import StandardScaler    # 数据标准化，也可使用BN

class DataSet:
    def __init__(self, paths, batch_size=32,normalize=True, shuffle=False):
        '''
        paths: 文件路径
        '''
        self.data = list()
        self.target = list()
        self.n_samples = None
        self.n_features = None

        self.idx = 0    # mini-batch的游标
        self.batch_size = batch_size

        self._load(paths)
        
        if shuffle:
            self._shuffle_data()
        if normalize:
            self._normalize_data()
            
        print(self.data.shape, self.target.shape)

    def _load(self, paths):
        '''
        载入数据
        '''
        for path in paths:
            cur_data, cur_target = unpickle(path)
            self.data.append(cur_data)
            self.target.append(cur_target)

        # 将所有批次的数据拼接起来
        self.data = np.vstack(self.data)
        self.target = np.hstack(self.target).reshape((-1, 1))

        self.n_samples, self.n_features = self.data.shape[0], self.data.shape[1]

    def _shuffle_data(self):
        '''
        打乱数据
        '''
        idxs = np.random.permutation(self.n_samples)
        self.data = self.data[idxs]
        self.target = self.target[idxs]
        
    def _normalize_data(self):
        scaler=StandardScaler()
        self.data=scaler.fit_transform(self.data)

    def next_batch(self):
        '''
        生成mini-batch
        '''
        while self.idx < self.n_samples:
            yield (self.data[self.idx:self.idx+self.batch_size], self.target[self.idx:self.idx+self.batch_size])
            self.idx += self.batch_size

        self.idx = 0
        self._shuffle_data()
        return None

In [4]:
CIFAR_DIR='../dataset/cifar-10-batches-py/'
train_paths=[os.path.join(CIFAR_DIR, 'data_batch_{}'.format(i)) for i in range(1, 2)]
val_paths=[os.path.join(CIFAR_DIR, 'test_batch')]

batch_size=10000
train_data = DataSet(train_paths,batch_size=batch_size, shuffle=True)
val_data=DataSet(val_paths,batch_size=batch_size)



(10000, 3072) (10000, 1)




(10000, 3072) (10000, 1)




In [5]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

scaler=StandardScaler()
X_sca=scaler.fit_transform(train_data.data)
Y=train_data.target

X_train,X_val,Y_train,Y_val=train_test_split(X_sca,Y,test_size=0.15)

print(X_train.shape, Y_train.shape,X_val.shape,Y_val.shape)

(8500, 3072) (8500, 1) (1500, 3072) (1500, 1)


# 网络结构设计

In [6]:
# 输入层单元数为64(数据特征)，设计隐藏层单元数为5，输出单元数为10(多分类任务)

unit_I = X_train.shape[1]    # 输入层的单元数，与特征数相等
unit_h1 = 100    # 第一层隐藏层的单元数
unit_h2 = 50
unit_O = 10    # 输出层单元数

# 搭建网络
TensorFlow自身提供了增加层数的方法。

In [7]:
# 输入必须是可由用户指定的，所以设为placeholder
X = tf.placeholder(tf.float32, shape=[None, unit_I])    # 数据的样本数不指定，只指定特征数
Y = tf.placeholder(tf.int32, shape=[None, 1])    # 目标值为列向量

# 网络结构子图
with tf.name_scope('DNN'):
    a1 = tf.layers.dense(X, unit_h1, activation=tf.nn.relu)
    a2 = tf.layers.dense(a1, unit_h2, activation=tf.nn.relu)
    Y_pred = tf.layers.dense(a2, unit_O, activation=tf.nn.softmax)

# 损失函数子图
with tf.name_scope('Loss'):
    # 计算一维向量与onehot向量之间的损失
    cross_entropy = tf.losses.sparse_softmax_cross_entropy(
        labels=Y, logits=Y_pred)

# optimization子图
lr = 0.01    # 学习率
with tf.name_scope('Train'):
    opt = tf.train.AdamOptimizer(lr).minimize(cross_entropy)    # Adam优化器

init = tf.global_variables_initializer()    # 所有变量初始化

# 训练网络

In [8]:
# 计算图已经构建好，开启一个tf会话，需要计算哪个值就run哪个变量即可
with tf.Session() as sess:
    sess.run(init)
    train_iter = 500

    cnt=0
    for i in range(train_iter):
        # mini-batch
        for X_batch,Y_batch in train_data.next_batch():
            sess.run(opt, feed_dict={X: X_batch, Y: Y_batch})
            cnt+=1
            if cnt%500==0:
                loss_train = sess.run(cross_entropy, feed_dict={X: X_batch, Y: Y_batch})
                print('train_iter: {}\tlog_loss:{}'.format(i, loss_train))

        # batch
#         sess.run(opt, feed_dict={X: train_data.data, Y: train_data.target})
#         if i % 100 == 0:
#             loss_train = sess.run(cross_entropy, feed_dict={
#                                   X: train_data.data, Y: train_data.target})
#             print('train_iter: {}\tlog_loss:{}'.format(i, loss_train))

    pred = sess.run(Y_pred, feed_dict={X: val_data.data, Y: val_data.target})
    loss_val = sess.run(cross_entropy, feed_dict={X: val_data.data, Y: val_data.target})
    acc = np.sum(np.squeeze(val_data.target) == np.argmax(pred, axis=1))/len(val_data.target)

    print('test_log_loss: {}\tacc:{}'.format(loss_val, acc))

0
100
200
300
400
500
600
700
800
900
test_log_loss: 2.038086414337158	acc:0.4208
