In [None]:
import struct
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime
from sklearn.metrics import accuracy_score, \
    precision_score, recall_score, f1_score, cohen_kappa_score
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler



def loadImageSet(filename):

    binfile = open(filename, 'rb')  # 读取二进制文件
    buffers = binfile.read()

    head = struct.unpack_from('>IIII', buffers, 0)  # 取前4个整数，返回一个元组

    offset = struct.calcsize('>IIII')  # 定位到data开始的位置
    imgNum = head[1]
    width = head[2]
    height = head[3]

    bits = imgNum * width * height  # data一共有60000*28*28个像素值
    bitsString = '>' + str(bits) + 'B'  # fmt格式：'>47040000B'

    imgs = struct.unpack_from(bitsString, buffers, offset)  # 取data数据，返回一个元组

    binfile.close()
    imgs = np.reshape(imgs, [imgNum, width * height])  # reshape为[60000,784]型数组

    return imgs, head


def loadLabelSet(filename):
    binfile = open(filename, 'rb')  # 读二进制文件
    buffers = binfile.read()

    head = struct.unpack_from('>II', buffers, 0)  # 取label文件前2个整形数

    labelNum = head[1]
    offset = struct.calcsize('>II')  # 定位到label数据开始的位置

    numString = '>' + str(labelNum) + "B"  # fmt格式：'>60000B'
    labels = struct.unpack_from(numString, buffers, offset)  # 取label数据

    binfile.close()
    labels = np.reshape(labels, [labelNum])  # 转型为列表(一维数组)

    return labels, head


if __name__ == "__main__":
    file1 = 'Mnist/train-images-idx3-ubyte/train-images.idx3-ubyte'
    file2 = 'Mnist/train-labels-idx1-ubyte/train-labels.idx1-ubyte'
    file3 = 'Mnist/t10k-labels-idx1-ubyte/t10k-labels.idx1-ubyte'
    file4 = 'Mnist/t10k-images-idx3-ubyte/t10k-images.idx3-ubyte'

    imgs_train, data_head_train = loadImageSet(file1)
    imgs_test, data_head_test = loadImageSet(file4)

    labels_train, labels_head_train = loadLabelSet(file2)
    labels_test, labels_head_test = loadLabelSet(file3)
    print(imgs_train[0].shape)# 28*28
    img = imgs_train[3]
    img = np.reshape(img, [28, 28], order='C')
    print(labels_train)

    imgs_train = imgs_train.astype(float)
    imgs_test = imgs_test.astype(float)
    labels_train = labels_train.astype(float)
    labels_test = labels_test.astype(float)

    plt.figure()
    plt.imshow(img)
    plt.show()






In [None]:
# 数据标准化
stdScaler = StandardScaler().fit(imgs_train)
imgs_trainStd = stdScaler.transform(imgs_train)
imgs_testStd = stdScaler.transform(imgs_test)

# PCA降维
print("这是降为154维的结果：")
pcaModel = PCA(n_components=154).fit(imgs_trainStd)
imgs_trainPca = pcaModel.transform(imgs_trainStd)
imgs_testPca = pcaModel.transform(imgs_testStd)
print("这是没有降成154维结果：")
print("训练开始时间: ", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
svm = SVC().fit(imgs_trainStd, labels_train)
print("训练结束时间: ", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

In [6]:
 # 预测结果
imgs_pred = svm.predict(imgs_testPca)
true = np.sum(imgs_pred == labels_test)
print("预测结果正确的数目: ", true)
print("预测结果错误的数目: ", labels_test.shape[0] - true)
print("预测结果准确率: ", true/labels_test.shape[0])

# 构建评价模型
print("\n\n##########  评价部分  ############\n\n")
print('使用SVM预测的数据准确率为: ',
        accuracy_score(labels_test, imgs_pred))
print('使用SVM预测的数据精确率为: ',
        precision_score(labels_test, imgs_pred, average="weighted"))
print('使用SVM预测的数据召回率为: ',
        recall_score(labels_test, imgs_pred, average="weighted"))
print('使用SVM预测的数据的F1值为: ',
        f1_score(labels_test, imgs_pred, average="weighted"))
print("使用SVM预测的数据的Cohen's Kappa系数为: ",
        cohen_kappa_score(labels_test, imgs_pred))

# 构建评价报告
print("\n\n##########  报告部分  ############\n\n")
print('使用SVM预测的数据分类报告: \n', classification_report(labels_test, imgs_pred))


预测结果正确的数目:  9554
预测结果错误的数目:  446
预测结果准确率:  0.9554


##########  评价部分  ############


使用SVM预测的数据准确率为:  0.9554
使用SVM预测的数据精确率为:  0.9600173588857184
使用SVM预测的数据召回率为:  0.9554
使用SVM预测的数据的F1值为:  0.9564501666426586
使用SVM预测的数据的Cohen's Kappa系数为:  0.9504184890743805


##########  报告部分  ############


使用SVM预测的数据分类报告: 
              precision    recall  f1-score   support

        0.0       0.99      0.97      0.98       980
        1.0       0.99      0.99      0.99      1135
        2.0       0.79      0.98      0.88      1032
        3.0       0.98      0.95      0.96      1010
        4.0       0.98      0.95      0.96       982
        5.0       0.97      0.95      0.96       892
        6.0       0.99      0.94      0.96       958
        7.0       0.97      0.94      0.95      1028
        8.0       0.98      0.95      0.96       974
        9.0       0.98      0.93      0.96      1009

avg / total       0.96      0.96      0.96     10000

