In [3]:
import tensorflow as tf
import os

### 二进制文件的读取（指定bytes：一个样本的）

* 构造二进制文件队列

* 读取二进制文件数据并进行解码 `tf.WholeFileReader()read`

In [16]:
class CifarRead(object):
    """读取CIFRA10类别的二进制文件"""
    def __init__(self):
        # 每个样本的图片属性
        self.height = 32
        self.width = 32
        self.channel = 3
        
        # bytes
        self.label_bytes = 1
        # 3072
        self.image_bytes = self.height * self.height * self.channel
        # 3073
        self.all_bytes = self.label_bytes + self.image_bytes
       
    def bytes_read(self, file_list):
        """读取二进制，解码为张量"""
        # 1.构造文件队列
        file_queue = tf.train.string_input_producer(file_list)

        # 2.读取二进制文件
        #   默认必须制定读取一个样本
        reader = tf.FixedLengthRecordReader(self.all_bytes)
        _, value = reader.read(file_queue)
        
        # 3.解码操作
        # （？, ） --> (3073, ) = label(1, ) + feature(3072, )
        label_img = tf.decode_raw(value, tf.uint8)
        print(label_img)
        
        # 为了训练方便，要把特征值和目标值分开处理，使用tf.slice
        label = tf.cast(tf.slice(label_img, [0], [self.label_bytes]), tf.int32)
        
        image = tf.slice(label_img, [self.label_bytes], [self.image_bytes])
        print(label, image)
        
        # 处理类型和图片数据形状，图片形状[32, 32, 3]
        # reshape(3071, ) ---> [channel, height, width]
        # 接下来使用tf.trainspose, 0, 1, 2 分别表示三个维度 
        # transpose[channel, height, width] ---> [height, width, channel]
        depth_major = tf.reshape(image, [self.channel, self.height, self.width])
        image_reshap = tf.transpose(depth_major, [1, 2, 0])
        print(depth_major)
        
        # 4.批处理操作
        img_batch, label_batch = tf.train.batch([image_reshap, label], batch_size=10, num_threads=1, capacity=10)
        
        return img_batch, label_batch

In [18]:
if __name__ == '__main__':
    filename = os.listdir("E:\\AI\\data\\cifar10\\cifar-10-batches-bin\\")
    file_list = [os.path.join("E:\\AI\\data\\cifar10\\cifar-10-batches-bin\\", file) for file in filename if file[-3 : ] == "bin"]
#     print(file_list)
    
    # 实例化类
    cr = CifarRead()
        
    img_batch, label_batch = cr.bytes_read(file_list)
    with tf.Session() as sess:
        
        # 创建线程回收的协调员
        coord = tf.train.Coordinator()
        
        # 需要手动开启子线程去进行批处理读取数据到队列的操作
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        
        print(sess.run([img_batch, label_batch]))
        
        # 回收线程
        coord.request_stop()
        
        coord.join(threads)
    

Tensor("DecodeRaw_6:0", shape=(?,), dtype=uint8)
Tensor("Cast_4:0", shape=(1,), dtype=int32) Tensor("Slice_9:0", shape=(3072,), dtype=uint8)
Tensor("Reshape_3:0", shape=(3, 32, 32), dtype=uint8)
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
[array([[[[255, 255, 254],
         [252, 252, 252],
         [253, 252, 254],
         ...,
         [165, 177, 184],
         [243, 246, 247],
         [254, 253, 252]],

        [[255, 255, 254],
         [252, 252, 252],
         [254, 254, 255],
         ...,
         [130, 144, 161],
         [225, 232, 239],
         [253, 255, 255]],

        [[255, 255, 255],
         [254, 254, 254],
         [248, 247, 249],
         ...,
         [141, 155, 175],
         [220, 227, 235],
         [253, 255, 255]],

        ...,

        [[233, 234, 234],
         [216, 216, 216],
         [241, 241, 241],
         ...,
         [  9,  11,  11],
         [ 18,  19,  19],
         [169, 171, 170]],

        [[255, 255,

### 总结

* 构造二进制文件队列

* 读取二进制数据并进行解码 `tf.FixedLengthRecordReader(self.all_bytes)  tf.decode_raw(value, tf.uint8)`

* 分割目标值和特征值 `tf.slice`

* 形状类型改变 `tf.cast`  

    * reshape(3072, ) ---> [channel, height, width]   (3072, ) --> (1024R, 1024G, 1024B)
    
    * transpose[channel, height, width] ---> [height, width, channel]
    
* 批处理