In [13]:
import struct
%run bp.ipynb
from datetime import datetime

0-0: output: 0.900000 delta: -0.002342
	downstream:
	(0-0) -> (1-0) = -0.500938
	(0-0) -> (1-1) = 2.608687
	(0-0) -> (1-2) = 0.046398
	upstream:
0-1: output: 0.100000 delta: 0.003135
	downstream:
	(0-1) -> (1-0) = -2.026666
	(0-1) -> (1-1) = -0.712548
	(0-1) -> (1-2) = 1.588791
	upstream:
0-2: output: 0.900000 delta: 0.000094
	downstream:
	(0-2) -> (1-0) = -0.146217
	(0-2) -> (1-1) = 0.409020
	(0-2) -> (1-2) = -0.227019
	upstream:
0-3: output: 0.100000 delta: 0.003104
	downstream:
	(0-3) -> (1-0) = 0.261321
	(0-3) -> (1-1) = -2.493177
	(0-3) -> (1-2) = -0.333243
	upstream:
0-4: output: 0.900000 delta: 0.003838
	downstream:
	(0-4) -> (1-0) = -0.252131
	(0-4) -> (1-1) = 0.405881
	(0-4) -> (1-2) = -2.688162
	upstream:
0-5: output: 0.900000 delta: -0.001700
	downstream:
	(0-5) -> (1-0) = 0.338803
	(0-5) -> (1-1) = -0.699660
	(0-5) -> (1-2) = 1.281856
	upstream:
0-6: output: 0.100000 delta: -0.004528
	downstream:
	(0-6) -> (1-0) = 2.424558
	(0-6) -> (1-1) = -0.289900
	(0-6) -> (1-2) = -0.32

In [14]:
# 数据加载器基类
class Loader(object):
    def __init__(self, path, count):
        '''
        初始化加载器
        path: 数据文件路径
        count: 文件中的样本个数
        '''
        self.path = path
        self.count = count

    def get_file_content(self):
        '''
        读取文件内容
        '''
        f = open(self.path, 'rb')
        content = f.read()
        f.close()
        return content

    def to_int(self, byte):
        '''
        将unsigned byte字符转换为整数
        '''
        # return struct.unpack('B', byte)[0]
        # 由于content已经是int整数了，故不需要转换了
        return byte


In [15]:
# 图像数据加载器
class ImageLoader(Loader):
    def get_picture(self, content, index):
        '''
        内部函数，从文件中获取图像
        数据文件中每个图像前面有一些元数据，占据了一定的字节数，这个字节数是 16
        '''
        start = index * 28 * 28 + 16
        picture = []
        for i in range(28):
            picture.append([])
            for j in range(28):
                picture[i].append(
                    self.to_int(content[start + i * 28 + j]))
        return picture

    def get_one_sample(self, picture):
        '''
        内部函数，将图像28*28转化为样本的输入向量1*764
        '''
        sample = []
        for i in range(28):
            for j in range(28):
                sample.append(picture[i][j])
        return sample

    def load(self):
        '''
        加载数据文件，获得全部样本的输入向量
        n*764，其中n为样本个数
        '''
        content = self.get_file_content()
        # print(type(content[100]))
        data_set = []
        for index in range(self.count):
            data_set.append(
                self.get_one_sample(
                    self.get_picture(content, index)))
        return data_set

In [16]:
# 标签数据加载器
class LabelLoader(Loader):
    def load(self):
        '''
        加载数据文件，获得全部样本的标签向量
        '''
        content = self.get_file_content()
        labels = []
        for index in range(self.count):
            labels.append(self.norm(content[index + 8]))
        return labels

    def norm(self, label):
        '''
        内部函数，将一个值转换为10维标签向量
        '''
        label_vec = []
        label_value = self.to_int(label)
        for i in range(10):
            if i == label_value:
                label_vec.append(0.9)
            else:
                label_vec.append(0.1)
        return label_vec

In [17]:
def get_training_data_set():
    '''
    获得训练数据集
    '''
    image_loader = ImageLoader('data/MNIST/train-images-idx3-ubyte', 60000)
    label_loader = LabelLoader('data/MNIST/train-labels-idx1-ubyte', 60000)
    return image_loader.load(), label_loader.load()


def get_test_data_set():
    '''
    获得测试数据集
    '''
    image_loader = ImageLoader('data/MNIST/t10k-images-idx3-ubyte', 10000)
    label_loader = LabelLoader('data/MNIST/t10k-labels-idx1-ubyte', 10000)
    return image_loader.load(), label_loader.load()


def show(sample):
    str = ''
    for i in range(28):
        for j in range(28):
            if sample[i*28+j] != 0:
                str += '*'
            else:
                str += ' '
        str += '\n'
    print(str)
    
def get_result(vec):
    max_value_index = 0
    max_value = 0
    for i in range(len(vec)):
        if vec[i] > max_value:
            max_value = vec[i]
            max_value_index = i
    return max_value_index


def evaluate(network, test_data_set, test_labels):
    error = 0
    total = len(test_data_set)

    for i in range(total):
        label = get_result(test_labels[i])
        predict = get_result(network.predict(test_data_set[i]))
        if label != predict:
            error += 1
    return float(error) / float(total)


def now():
    return datetime.now().strftime('%c')

def train_and_evaluate():
    last_error_ratio = 1.0
    epoch = 0
    train_data_set, train_labels = get_training_data_set()
    test_data_set, test_labels = get_test_data_set()
    network = Network([784, 100, 10])
    while True:
        epoch += 1
        network.train(train_labels, train_data_set, 0.3, 1)
        print('%s epoch %d finished' % (now(), epoch))
        if epoch % 2 == 0:
            error_ratio = evaluate(network, test_data_set, test_labels)
            print('%s after epoch %d, error ratio is %f' % (now(), epoch, error_ratio))
            if error_ratio > last_error_ratio:
                break
            else:
                last_error_ratio = error_ratio

In [None]:
if __name__ == '__main__':
    train_and_evaluate()

  return 1.0 / (1 + exp(-inX))
