In [1]:
import struct
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime
from sklearn.metrics import accuracy_score, \
    precision_score, recall_score, f1_score, cohen_kappa_score
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler



def loadImageSet(filename):

    binfile = open(filename, 'rb')  # 读取二进制文件
    buffers = binfile.read()

    head = struct.unpack_from('>IIII', buffers, 0)  # 取前4个整数，返回一个元组

    offset = struct.calcsize('>IIII')  # 定位到data开始的位置
    imgNum = head[1]
    width = head[2]
    height = head[3]

    bits = imgNum * width * height  # data一共有60000*28*28个像素值
    bitsString = '>' + str(bits) + 'B'  # fmt格式：'>47040000B'

    imgs = struct.unpack_from(bitsString, buffers, offset)  # 取data数据，返回一个元组

    binfile.close()
    imgs = np.reshape(imgs, [imgNum, width * height])  # reshape为[60000,784]型数组

    return imgs, head


def loadLabelSet(filename):
    binfile = open(filename, 'rb')  # 读二进制文件
    buffers = binfile.read()

    head = struct.unpack_from('>II', buffers, 0)  # 取label文件前2个整形数

    labelNum = head[1]
    offset = struct.calcsize('>II')  # 定位到label数据开始的位置

    numString = '>' + str(labelNum) + "B"  # fmt格式：'>60000B'
    labels = struct.unpack_from(numString, buffers, offset)  # 取label数据

    binfile.close()
    labels = np.reshape(labels, [labelNum])  # 转型为列表(一维数组)

    return labels, head


if __name__ == "__main__":
    file1 = 'Mnist/train-images-idx3-ubyte/train-images.idx3-ubyte'
    file2 = 'Mnist/train-labels-idx1-ubyte/train-labels.idx1-ubyte'
    file3 = 'Mnist/t10k-labels-idx1-ubyte/t10k-labels.idx1-ubyte'
    file4 = 'Mnist/t10k-images-idx3-ubyte/t10k-images.idx3-ubyte'

    imgs_train, data_head_train = loadImageSet(file1)
    imgs_test, data_head_test = loadImageSet(file4)

    labels_train, labels_head_train = loadLabelSet(file2)
    labels_test, labels_head_test = loadLabelSet(file3)
    print(imgs_train[0].shape)# 28*28
    img = imgs_train[3]
    img = np.reshape(img, [28, 28], order='C')
    print(labels_train)

    imgs_train = imgs_train.astype(float)
    imgs_test = imgs_test.astype(float)
    labels_train = labels_train.astype(float)
    labels_test = labels_test.astype(float)

    plt.figure()
    plt.imshow(img)
    plt.show()


(784,)
[5 0 4 ... 5 6 8]


<Figure size 640x480 with 1 Axes>

这是没有降维结果：
训练开始时间:  00:13:03
训练结束时间:  00:22:26
