In [None]:
import os
import struct
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.python.keras.layers import Input,Dense
from tensorflow.python.keras.models import Model
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

In [None]:
# 定义处理本地数据集MNIST的加载函数
def load_mnist(path,kind = 'train'):
    """Load MNIST data from 'path'"""
    # 先把4个MNSIT数据集压缩文件解压，不然python解压（要读.gz文件）会有问题
    labels_path = os.path.join(path,'%s-labels.idx1-ubyte' % kind)
    images_path = os.path.join(path,'%s-images.idx3-ubyte' % kind)
    
    with open(labels_path,'rb') as lbpath:
        #magic,n = struct.unpack('>II',lbpath.read(8))
        """
        有些不懂得可以看这个
        https://jingyan.baidu.com/article/414eccf6a45c9b6b431f0a2a.html
        转化成numpy数组 文件中加载数组
        uint8的范围是 [0,255]，即MNIST数据集的像素值范围
        np.fromfile从文件中加载数组，错误的dtype会出错
        """
        labels = np.fromfile(lbpath, dtype = np.uint8)
        labels = labels[8:] # MNIST官网描述标签从第8个开始
        
    with open(images_path,'rb') as imgpath:
        # 图片是以字节的形式进行存储,要读到numpy数组中
        #magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16)) # 官网描述图像像素从16字节开始
        # 图片保存成二维数组，行数为样本数，每行784个数，即一张图片
        images = np.fromfile(imgpath, dtype = np.uint8)[16:].reshape(len(labels), 784)
        
    return images,labels

# 读取本地训练数据和测试数据

x_train, y_train = load_mnist('./MNIST_data', kind = 'train')
x_test, y_test = load_mnist('./MNIST_data', kind = 't10k')

x_train = x_train.reshape(-1,28,28,1).astype('float32') # 方便归一化
x_test = x_test.reshape(-1,28,28,1).astype('float32')

# 归一化数据，使之在[0,1]之间
x_train = x_train / 255.
x_test = x_test / 255.

# 对x_train展平为：-1*784
# np.prob()元素进行连乘操作 但是为什么这样操作出来就是784了
# x_train.shape[1:] = (784,)
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))

# 定义输入层节点，隐含层节点
input_img = Input(shape = (784,))# 表示预期输入是784维的数据
encoding_dim = 32 #就是输出神经元个数吧

"""
利用keras函数式模型
dense:全连接层，相当于添加一个层
activation激活函数
encoded为 784*32 decoded 为 32*784
"""
encoded = Dense(encoding_dim, activation='relu')(input_img)# (input_img)即该层输入
decoded = Dense(784, activation='sigmoid')(encoded)

# 创建自编码模型
autoencoder = Model(inputs = input_img, outputs = decoded)

# 创建编码器模型
encoder = Model(inputs = input_img, outputs = encoded)

encoded_input = Input(shape = (encoding_dim,))
decoder_layer = autoencoder.layers[-1]

# 创建解码器模型
decoder = Model(inputs = encoded_input, outputs = decoder_layer(encoded_input))

# 编译自编码器模型
autoencoder.compile(optimizer = 'adam', loss = 'binary_crossentropy',metrics = ['accuracy'])
# 训练该模型
# 这里自己就可视化打印每个epoch训练结果了
"""
fit( x, y, batch_size=32, epochs=10, verbose=1, callbacks=None,
validation_split=0.0, validation_data=None, shuffle=True, 
class_weight=None, sample_weight=None, initial_epoch=0)
具体参数意义看
https://blog.csdn.net/qq_41814556/article/details/82559841
"""
autoencoder.fit(x_train, x_train, epochs = 50, batch_size = 256,shuffle = True,validation_data = (x_test,x_test))
# 输出预测值
encoded_imgs = encoder.predict(x_test)
decoded_imgs = decoder.predict(encoded_imgs)

# 显示10个数字
n = 10
plt.figure(figsize = (20,4))
for i in range(n):
    # 可视化输入数据
    ax = plt.subplot(2,n,i+1)
    plt.imshow(x_test[i].reshape(28,28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    # 可视化自编码器学习结果
    ax = ax = plt.subplot(2,n,i+1+n)
    plt.imshow(decoded_imgs[i].reshape(28,28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()