In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets,layers,optimizers
import matplotlib.pyplot as plt
import numpy as np

In [3]:
(train_images,train_labels),(test_images,test_labels) = datasets.fashion_mnist.load_data()

In [9]:
X_train = train_images/255
X_train = X_train.reshape((-1,28,28,1),)
X_test = test_images/255
X_test = X_test.reshape((-1,28,28,1))
print(X_train.shape)
print(X_test.shape)

(60000, 28, 28, 1)
(10000, 28, 28, 1)


In [6]:
print(train_labels.shape)
Y_train = keras.utils.to_categorical(train_labels)
Y_test = keras.utils.to_categorical(test_labels)
print(Y_train.shape)
print(Y_test.shape)

(60000,)
(60000, 10)
(10000, 10)


In [49]:
# Teacher Model # 老师模型
def TeacherModel():
    input_layer = keras.layers.Input(shape=(28,28,1))
    conv_layer1 = keras.layers.Conv2D(filters=32,
                                      kernel_size=[3,3],
                                      padding='same',
                                      input_shape=(28,28,1),
                                      activation='relu')(input_layer)
    conv_layer2 = keras.layers.Conv2D(filters=64,
                                      kernel_size=[3,3],
                                      padding='same',
                                      activation='relu'
                                     )(conv_layer1)
    max_pooling2 = keras.layers.MaxPool2D(pool_size=(2,2))(conv_layer2)
    dropout1 = keras.layers.Dropout(0.35)(max_pooling2)
    flatten_layer = keras.layers.Flatten()(dropout1)
    hidden_layer1 = keras.layers.Dense(128,activation='relu')(flatten_layer)
    dropout2 = keras.layers.Dropout(0.5)(hidden_layer1)
    ouput_layer = keras.layers.Dense(10,activation='softmax')(dropout2)
    
    model = keras.models.Model(inputs = [input_layer],
                               outputs = ouput_layer)
    return model
teacher_model = TeacherModel()
teacher_model.summary()

teacher_model.compile(optimizer=optimizers.Adam(),loss="categorical_crossentropy",metrics=['acc'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_17 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_25 (Conv2D)           (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 14, 14, 64)        0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 14, 14, 64)        0         
_________________________________________________________________
flatten_16 (Flatten)         (None, 12544)             0         
_________________________________________________________________
dense_32 (Dense)             (None, 128)               1605760   
__________

In [50]:
teacher_model.fit(X_train,Y_train,batch_size=512,epochs=20,validation_data=(X_test,Y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7febe681e898>

In [52]:
Y_teacher_train = teacher_model.predict(X_train)

In [53]:
Y_teacher_train.shape

(60000, 10)

In [37]:
# Student Model # 学生模型
def StudentModel():
    input_layer = keras.layers.Input(shape=(28,28,1))
    conv_layer1 = keras.layers.Conv2D(filters=2,
                                      kernel_size=[4,4],
                                      padding='same',
                                      input_shape=(28,28,1),
                                      activation='relu')(input_layer)
    max_pooling1 = keras.layers.MaxPool2D(pool_size=(2,2),
                                          strides=2, # 步长
                                         )(conv_layer1)
    flatten_layer = keras.layers.Flatten()(max_pooling1)
    hidden_layer1 = keras.layers.Dense(16,activation='relu')(flatten_layer)
    ouput_layer = keras.layers.Dense(10,activation='softmax')(hidden_layer1)
    
    model = keras.models.Model(inputs = [input_layer],
                               outputs = ouput_layer)
    return model
student_model = StudentModel()
student_model.summary()
student_model.compile(optimizer=optimizers.Adam(),loss="categorical_crossentropy",metrics=['acc'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_11 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 28, 28, 2)         34        
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 14, 14, 2)         0         
_________________________________________________________________
flatten_10 (Flatten)         (None, 392)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 16)                6288      
_________________________________________________________________
dense_21 (Dense)             (None, 10)                170       
Total params: 6,492
Trainable params: 6,492
Non-trainable params: 0
_________________________________________________________________


In [38]:
# 直接使用原始数据进行训练的结果
student_model.fit(X_train,Y_train,batch_size=512,epochs=10,validation_data=(X_test,Y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7febe8c70710>

In [54]:
# 创建跟随老师学习的模型
floow_teacher_model = StudentModel()
floow_teacher_model.summary()
floow_teacher_model.compile(optimizer=optimizers.Adam(),loss="categorical_crossentropy",metrics=['acc'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_18 (InputLayer)        (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 28, 28, 2)         34        
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 14, 14, 2)         0         
_________________________________________________________________
flatten_17 (Flatten)         (None, 392)               0         
_________________________________________________________________
dense_34 (Dense)             (None, 16)                6288      
_________________________________________________________________
dense_35 (Dense)             (None, 10)                170       
Total params: 6,492
Trainable params: 6,492
Non-trainable params: 0
_________________________________________________________________


In [55]:
# 使用老师的数据进行学习:在验证集上学习效果明显变好，测试集因为老师学的还不够好，没有明显优势，但如果老师学习的足够好，将会有更大的提升
floow_teacher_model.fit(X_train,Y_teacher_train,batch_size=512,epochs=10,validation_data=(X_test,Y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7febe6177cf8>