In [1]:
import numpy as np
import struct

random_state = 666
np.random.seed(random_state)

def _decode_idx3_ubyte(idx3_ubyte_file):
    # 读取二进制数据
    bin_data = open(idx3_ubyte_file, "rb").read()

    # 解析文件头信息，依次为魔数、图片数量、每张图片高、每张图片宽
    offset = 0
    fmt_header = ">iiii"
    magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)

    # 解析数据集
    image_size = num_rows * num_cols
    offset += struct.calcsize(fmt_header)
    fmt_image = ">" + str(image_size) + "B"
    images = np.empty((num_images, num_rows, num_cols))
    for i in range(num_images):
        images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
        offset += struct.calcsize(fmt_image)
    return images

def _decode_idx1_ubyte(idx1_ubyte_file):
    # 读取二进制数据
    bin_data = open(idx1_ubyte_file, "rb").read()

    # 解析文件头信息，依次为魔数和标签数
    offset = 0
    fmt_header = ">ii"
    magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)

    # 解析数据集
    offset += struct.calcsize(fmt_header)
    fmt_image = ">B"
    labels = np.empty(num_images)
    for i in range(num_images):
        labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
        offset += struct.calcsize(fmt_image)
    return labels

def _load_X_train(X_train_path):
    return _decode_idx3_ubyte(X_train_path)

def _load_y_train(y_train_path):
    return _decode_idx1_ubyte(y_train_path)

def _load_X_test(X_test_path):
    return _decode_idx3_ubyte(X_test_path)

def _load_y_test(y_test_path):
    return _decode_idx1_ubyte(y_test_path)

def load(X_train_path, X_test_path, y_train_path, y_test_path):
    return _load_X_train(X_train_path), _load_X_test(X_test_path), _load_y_train(y_train_path), _load_y_test(y_test_path)

X_train, X_test, y_train, y_test = load(
    "./data_02_X_train.idx3-ubyte",
    "./data_02_X_test.idx3-ubyte",
    "./data_02_y_train.idx1-ubyte",
    "./data_02_y_test.idx1-ubyte"
)
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
y_train = y_train.reshape(y_train.shape[0], -1)
y_test = y_test.reshape(y_test.shape[0], -1)

In [2]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(60000, 784) (60000, 1) (10000, 784) (10000, 1)


In [3]:
# 均值归一化
from sklearn.preprocessing import minmax_scale
X_train_normalize = minmax_scale(X_train)
X_test_normalize = minmax_scale(X_test)

In [4]:
X_train[0]

array([  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
         0.,   0.,   0.,   0.,   0.,   0.,   0.,   

In [6]:
X_train_normalize[0]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [7]:
# 将结果转换为向量，方便做为多分类输出结果
from keras.utils import to_categorical
y_train_format = to_categorical(y_train)
y_test_format = to_categorical(y_test)

Using TensorFlow backend.


In [8]:
from keras.models import Sequential
from keras.layers import Dense
# 建立MLP模型
mlp_model = Sequential()
mlp_model.add(Dense(units=392, input_dim=784, activation='sigmoid'))
mlp_model.add(Dense(units=392, activation='sigmoid'))
mlp_model.add(Dense(units=10, activation='softmax'))
# 配置模型损失函数和最优化方法
mlp_model.compile(loss='categorical_crossentropy', optimizer='adam')

In [9]:
# 查看模型结构
mlp_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 392)               307720    
_________________________________________________________________
dense_2 (Dense)              (None, 392)               154056    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                3930      
Total params: 465,706
Trainable params: 465,706
Non-trainable params: 0
_________________________________________________________________


In [10]:
# 模型训练
mlp_model.fit(X_train_normalize, y_train_format, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1304f21d208>

In [11]:
from sklearn.metrics import accuracy_score
# 训练集
y_train_predict = mlp_model.predict_classes(X_train_normalize)
y_train_predict = to_categorical(y_train_predict)
train_score = accuracy_score(y_true=y_train_format, y_pred=y_train_predict)
train_score

0.99835

In [12]:
# 测试集
y_test_predict = mlp_model.predict_classes(X_test_normalize)
y_test_predict = to_categorical(y_test_predict)
test_score = accuracy_score(y_true=y_test_format, y_pred=y_test_predict)
test_score

0.9817