In [1]:
import tensorflow as tf
import pickle
import numpy as np
import os

  from ._conv import register_converters as _register_converters


In [2]:
CAFIR_DIR = "./cifar-10-batches-py/"
print(os.listdir(CAFIR_DIR))

['batches.meta', 'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'readme.html', 'test_batch']


In [12]:
def load_file(filename):
    '''read file return date and lable'''
    with open(filename,'rb') as f:
        data = pickle.load(f,encoding="bytes")
        return data[b'data'],data[b'labels']
#c创建读取数据的类，需要参数filename,isTrain,shuffle
class CafirDate:
    #初始化数据
    def __init__(self,filenames,isShuffle):
        #存放数据
        datas = []
        #存放标签
        labels = []
        for filename in filenames:
            data,label = load_file(filename)
            #测试二分类，只对【0,1】标签的数据进行存取
            for item,index in zip(data,label):
                if index in [0,1]:
                    datas.append(item)
                    labels.append(index)
        #对得到的数据纵向进行拼接
        self.datas = np.vstack(datas)
        self.datas = self.datas / 127.5 - 1
        #对得到的标签进行横向拼接
        self.labels = np.hstack(labels)
        
        
        #测试
        print(self.datas.shape)
        print(self.labels.shape)
        
        
        #获取共有多少条数据
        self.num_examples = self.datas.shape[0]
        #获取是否需要打乱数据
        self.isShuffle = isShuffle
        #设置起始位置，batch_size 时使用
        self.indicator = 0
        #先打乱下顺序
        if self.isShuffle:
            self.Shuffle()
    def Shuffle(self):
        #随机的打乱标签
        index = np.random.permutation(self.num_examples)
        self.datas = self.datas[index]
        self.labels = self.labels[index]
    def next_batch(self,batch_size):
        '''return batch_size example '''
        self.endIndicator = self.indicator+batch_size
        if self.endIndicator > self.num_examples:
            #如果是训练过程，就可以重新取值
            if self.isShuffle:
                #首先重新打乱
                self.Shuffle()
                self.indicator = 0
                self.endIndicator = self.indicator+batch_size
            else:
                raise Exception('No More data..')
        #如果还是大于总数
        if self.endIndicator > self.num_examples:
            raise Exception('batch_size too lagre...')
        batch_data = self.datas[self.indicator:self.endIndicator]
        batch_label = self.labels[self.indicator:self.endIndicator]
        self.indicator = self.endIndicator
        return batch_data,batch_label
    
    
#得到训练数据集的名字
filenames = [CAFIR_DIR+"data_batch_%d" %i for i in range(1,6)]
print(filenames)
#得到训练数据集的对象
train_data  =  CafirDate(filenames,True)  


#获取测试数据集的名字
testFilenames = [CAFIR_DIR+"test_batch"]
test_data = CafirDate(testFilenames,False)



['./cifar-10-batches-py/data_batch_1', './cifar-10-batches-py/data_batch_2', './cifar-10-batches-py/data_batch_3', './cifar-10-batches-py/data_batch_4', './cifar-10-batches-py/data_batch_5']
(10000, 3072)
(10000,)
(2000, 3072)
(2000,)


In [4]:
#设置变量
x = tf.placeholder(tf.float32,[None,3072])
y = tf.placeholder(tf.int32,[None])

In [5]:
#初始化参数
w = tf.get_variable('w',[x.get_shape()[-1],1],initializer = tf.random_normal_initializer(0,0.1))
b = tf.get_variable('b',[1],initializer=tf.constant_initializer(0.0))

In [7]:
#搭建神经网络  
#[None,1]
y_ = tf.matmul(x,w)+b
y_ = tf.nn.sigmoid(y_)
#[None,1]
y_reshape = tf.reshape(y,(-1,1))
#类型
y_reshape_float = tf.cast(y_reshape,tf.float32)
#创建损失函数

loss = tf.reduce_mean(tf.square(y_reshape_float-y_))

#计算准确率
# >0.5 的为预测准确的
y_pre = y_ > 0.5
y_pre = tf.cast(y_pre,tf.int32)  
#进行比较  [1,0,0,0,0,1,1,0,0,.....]     ！！！！！！！！！！！！！！！！！判断类型的时候一定要保证shape是一致的。！！！！！！
y_predict = tf.equal(y_pre,y_reshape)
#计算准确率
acc = tf.reduce_mean(tf.cast(y_predict,tf.float32))

In [8]:
with tf.name_scope('train'):
    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)

In [13]:
init = tf.global_variables_initializer()
#设置batch_size
batch_size = 20
#设置train_step 
train_step = 100000
with tf.Session() as sess:
    sess.run(init)
    for i in range(train_step):
        #首先获取batch_size 数据
        batch_data,batch_label = train_data.next_batch(batch_size)
        _,loss_,acc_val = sess.run([train_op,loss,acc], feed_dict = {x:batch_data,y:batch_label})
        if (i+1)%500 == 0:
            print("[train]step:%d  _____train_loss:%f ____train_acc:%4.5f" % (i+1,loss_,acc_val))
        if (i+1)%5000 == 0:
            test_data = CafirDate(testFilenames,False)
            accs = []
           
            for j in range(100):
                test_batch_data,test_batch_label = test_data.next_batch(batch_size)
                t_acc_ = sess.run(acc,feed_dict={x:test_batch_data,y:test_batch_label})
                accs.append(t_acc_)
            test_acc = np.mean(accs)
            print("[test ] step:%d____acc:%4.5f  " % (i+1,test_acc))
                
                

[train]step:500  _____train_loss:0.189450 ____train_acc:0.75000
[train]step:1000  _____train_loss:0.219524 ____train_acc:0.75000
[train]step:1500  _____train_loss:0.151091 ____train_acc:0.75000
[train]step:2000  _____train_loss:0.083754 ____train_acc:0.90000
[train]step:2500  _____train_loss:0.051658 ____train_acc:0.95000
[train]step:3000  _____train_loss:0.078257 ____train_acc:0.95000
[train]step:3500  _____train_loss:0.084973 ____train_acc:0.85000
[train]step:4000  _____train_loss:0.162157 ____train_acc:0.75000
[train]step:4500  _____train_loss:0.239582 ____train_acc:0.65000
[train]step:5000  _____train_loss:0.118424 ____train_acc:0.85000
(2000, 3072)
(2000,)
[test ] step:5000____acc:0.81700  
[train]step:5500  _____train_loss:0.132844 ____train_acc:0.90000
[train]step:6000  _____train_loss:0.121220 ____train_acc:0.80000
[train]step:6500  _____train_loss:0.104202 ____train_acc:0.85000
[train]step:7000  _____train_loss:0.235452 ____train_acc:0.70000
[train]step:7500  _____train_loss:0

[train]step:58000  _____train_loss:0.028441 ____train_acc:1.00000
[train]step:58500  _____train_loss:0.110926 ____train_acc:0.80000
[train]step:59000  _____train_loss:0.134136 ____train_acc:0.85000
[train]step:59500  _____train_loss:0.114056 ____train_acc:0.80000
[train]step:60000  _____train_loss:0.130963 ____train_acc:0.85000
(2000, 3072)
(2000,)
[test ] step:60000____acc:0.80200  
[train]step:60500  _____train_loss:0.126993 ____train_acc:0.85000
[train]step:61000  _____train_loss:0.082265 ____train_acc:0.90000
[train]step:61500  _____train_loss:0.010283 ____train_acc:1.00000
[train]step:62000  _____train_loss:0.071827 ____train_acc:0.95000
[train]step:62500  _____train_loss:0.058678 ____train_acc:0.95000
[train]step:63000  _____train_loss:0.013388 ____train_acc:1.00000
[train]step:63500  _____train_loss:0.080580 ____train_acc:0.95000
[train]step:64000  _____train_loss:0.039383 ____train_acc:0.95000
[train]step:64500  _____train_loss:0.019201 ____train_acc:1.00000
[train]step:65000  