In [1]:
import pickle
import numpy as np
import os
class DataManager(object):
    '''data manager'''
    def __init__(self, data_path, need_shuffle=False):
        self.data_path = data_path
        self.all_data = []
        self.all_labels = []
        self.need_shuffle = need_shuffle
        self.indicate = 0
    
    def data_process(self):
        self.data_load()
        if self.need_shuffle:
            self.shuffle_data()
        return self.all_data, self.all_labels
        
    def data_load(self):
        print("loading data ......")
        for file in self.data_path:
            print("file:", file)
            if os.path.isfile(file):
                with open(file, "rb") as f:
                    loaded_data = pickle.load(f, encoding="ISO-8859-1")
                    data = loaded_data.get("data")
                    label = loaded_data.get("labels")
                    self.all_data.append(data)
                    self.all_labels.append(label)
        self.all_data = np.vstack(self.all_data)
        self.all_labels = np.hstack(self.all_labels)
        self.all_data = self.all_data/127.5
        print(self.all_data.shape)
        print(self.all_labels.shape)
        print("first 3 data:", self.all_data[0: 3])
        print("first 10 label:", self.all_labels[0: 10])

    
    def shuffle_data(self):
#         print("shuffle_data.......")
        idx = np.random.permutation(self.all_data.shape[0])
        self.all_data = self.all_data[idx]
        self.all_labels = self.all_labels[idx]
#         print("first 3 data:", self.all_data[0: 3])
#         print("first 3 label:", self.all_labels[0: 10])
    
    def next_batch_data(self, batch_size):
        if batch_size > self.all_data.shape[0]:
            raise Exception("batch size is larger than data size")
        if self.indicate + batch_size > self.all_data.shape[0]:
            if self.need_shuffle:
                self.shuffle_data()
                self.indicate = 0
            else:
                raise Exception("has no more data")
        batch_data = self.all_data[self.indicate: self.indicate + batch_size]
        batch_label = self.all_labels[self.indicate: self.indicate + batch_size]
        self.indicate = self.indicate + batch_size
        return batch_data, batch_label

cifar_dir = "../data/cifar-10-batches-py"
# cifar_dir = "workspace/workspace_py/learn/deepLearning/data/cifar-10-batches-py"
train_data_file = [cifar_dir + "/data_batch_%d" % i for i in range(1, 6)]
test_data_file = [cifar_dir + "/test_batch"]

train_data_manager = DataManager(train_data_file, need_shuffle=True)
train_data_manager.data_process()

test_data_manager = DataManager(test_data_file, need_shuffle=False)
test_data, test_label = test_data_manager.data_process()


loading data ......
file: ../data/cifar-10-batches-py/data_batch_1
file: ../data/cifar-10-batches-py/data_batch_2
file: ../data/cifar-10-batches-py/data_batch_3
file: ../data/cifar-10-batches-py/data_batch_4
file: ../data/cifar-10-batches-py/data_batch_5
(50000, 3072)
(50000,)
first 3 data: [[0.4627451  0.3372549  0.39215686 ... 1.09803922 0.65882353 0.56470588]
 [1.20784314 0.98823529 0.82352941 ... 1.09019608 1.11372549 1.12941176]
 [2.         1.98431373 1.98431373 ... 0.65098039 0.65098039 0.65882353]]
first 10 label: [6 9 9 4 1 1 2 7 8 3]
loading data ......
file: ../data/cifar-10-batches-py/test_batch
(10000, 3072)
(10000,)
first 3 data: [[1.23921569 1.24705882 1.29411765 ... 0.97254902 1.01176471 0.8627451 ]
 [1.84313725 1.81176471 1.81960784 ... 1.39607843 1.49803922 1.56078431]
 [1.23921569 1.23921569 1.09019608 ... 0.0627451  0.02352941 0.05490196]]
first 10 label: [3 8 8 0 6 6 1 6 3 1]


In [2]:
""" building network """
import tensorflow as tf

x = tf.placeholder(tf.float32, [None, 3072])
y = tf.placeholder(tf.int64, [None])

w = tf.get_variable("w", [x.get_shape()[-1], 10], initializer=tf.random_normal_initializer(0, 1))
b = tf.get_variable("b", [10], initializer=tf.constant_initializer(0.0))

mul_res = tf.matmul(x, w)
print("mul_res.shape:", mul_res.shape)
print("b shape:", b.shape)
y_ = tf.matmul(x, w) + b
print("y_.shape:", y_.shape)
pred_y = tf.nn.softmax(y_)

y_one_hot = tf.one_hot(y, 10, dtype=tf.float32)
loss = tf.reduce_mean(tf.square(y_one_hot - pred_y))

train_op = tf.train.AdadeltaOptimizer(1e-3).minimize(loss)

pred_max_y = tf.argmax(pred_y, axis=1)
accuracy = tf.reduce_mean(tf.cast(tf.equal(y, pred_max_y), dtype=tf.float32))


                          

  from ._conv import register_converters as _register_converters


mul_res.shape: (?, 10)
b shape: (10,)
y_.shape: (?, 10)


In [3]:

""" train model """
import tensorflow as tf

batch_size = 32
iteration = 10000
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.66)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    sess.run(tf.global_variables_initializer())
    
    
    for i in range(iteration):
        batch_data, batch_label = train_data_manager.next_batch_data(batch_size)
#         print("batch_data.shape:", batch_data.shape)
#         print("label.size:", len(batch_label))
        acc_res, loss_res, _ = sess.run([accuracy, loss, train_op], feed_dict={x: batch_data, y: batch_label})
        if (i + 1) % 1000 == 0:
            print("train step: %d, acc:%4.5f, loss:%4.5f" % ((i + 1), acc_res, loss_res))
        if (i + 1) % 5000 == 0:
            
            acc_res, loss_res = sess.run([accuracy, loss], feed_dict={x: test_data, y: test_label})
            print("test step: %d, acc:%4.5f, loss:%4.5f" % ((i + 1), acc_res, loss_res))
                                         

train step: 1000, acc:0.03125, loss:0.19252
train step: 2000, acc:0.00000, loss:0.19997
train step: 3000, acc:0.09375, loss:0.17686
train step: 4000, acc:0.06250, loss:0.18677
train step: 5000, acc:0.12500, loss:0.17230
test step: 5000, acc:0.09090, loss:0.17950
train step: 6000, acc:0.03125, loss:0.19245
train step: 7000, acc:0.03125, loss:0.19304
train step: 8000, acc:0.00000, loss:0.19729
train step: 9000, acc:0.03125, loss:0.18885
train step: 10000, acc:0.00000, loss:0.19838
test step: 10000, acc:0.09110, loss:0.17949
