## 使用keras训练notMNIST数据集，尝试之前作业的几种全连接网络结构

- 由于keras和tensorflow版本的问题，这里通过tensorflow调用keras，而不是直接import keras

In [35]:
from tensorflow.keras.models import Sequential  
from tensorflow.keras.layers import Dense, Dropout, Activation  
from tensorflow.keras.optimizers import SGD  
from tensorflow.keras.callbacks import Callback
from tensorflow.keras import regularizers
import numpy as np
from sklearn.metrics import roc_auc_score

In [2]:
import pickle

In [3]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


In [4]:
image_size = 28
num_labels = 10
def reformat(dataset,labels):
    dataset = dataset.reshape(-1,image_size*image_size).astype(np.float32)
    lables = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return dataset, lables
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)      

Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)


In [22]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [41]:
class RocAucEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            score = roc_auc_score(self.y_val, y_pred)
            accur = accuracy(y_pred,self.y_val)
            print("\n ROC-AUC - epoch: %d - score: %.6f - accuracy:  %.1f%% \n" % (epoch+1, score,accur))

In [42]:
RocAuc = RocAucEvaluation(validation_data=(valid_dataset, valid_labels), interval=1)

### fully-connected

In [43]:
model = Sequential()
model.add(Dense(1024,input_shape=(784,))) # 输入层，28*28=784 
model.add(Activation('relu')) # 激活函数是relu
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = SGD(lr=0.1)
model.compile(loss='categorical_crossentropy', optimizer=sgd) 
model.fit(train_dataset,train_labels,batch_size=128,epochs=5,callbacks=[RocAuc], verbose=2,
          validation_data=(valid_dataset, valid_labels))

Train on 200000 samples, validate on 10000 samples
Epoch 1/5

 ROC-AUC - epoch: 1 - score: 0.985811 - accuracy:  86.4% 

200000/200000 - 6s - loss: 0.5407 - val_loss: 0.4672
Epoch 2/5

 ROC-AUC - epoch: 2 - score: 0.988514 - accuracy:  87.5% 

200000/200000 - 6s - loss: 0.4236 - val_loss: 0.4207
Epoch 3/5

 ROC-AUC - epoch: 3 - score: 0.989543 - accuracy:  88.2% 

200000/200000 - 6s - loss: 0.3758 - val_loss: 0.4029
Epoch 4/5

 ROC-AUC - epoch: 4 - score: 0.990551 - accuracy:  89.0% 

200000/200000 - 7s - loss: 0.3428 - val_loss: 0.3795
Epoch 5/5

 ROC-AUC - epoch: 5 - score: 0.991231 - accuracy:  89.1% 

200000/200000 - 6s - loss: 0.3183 - val_loss: 0.3697


<tensorflow.python.keras.callbacks.History at 0x24efad0eb70>

### fully-connected with L2-regulization

In [45]:
model = Sequential()
model.add(Dense(1024,input_shape=(784,),kernel_regularizer=regularizers.l2(0.01))) # 输入层，28*28=784 
model.add(Activation('relu')) # 激活函数是relu
model.add(Dense(10,kernel_regularizer=regularizers.l2(0.01)))
model.add(Activation('softmax'))
sgd = SGD(lr=0.1)
model.compile(loss='categorical_crossentropy', optimizer=sgd) 
model.fit(train_dataset,train_labels,batch_size=128,epochs=5,callbacks=[RocAuc], verbose=2,
          validation_data=(valid_dataset, valid_labels))

Train on 200000 samples, validate on 10000 samples
Epoch 1/5

 ROC-AUC - epoch: 1 - score: 0.976101 - accuracy:  82.8% 

200000/200000 - 8s - loss: 2.2818 - val_loss: 0.8521
Epoch 2/5

 ROC-AUC - epoch: 2 - score: 0.976243 - accuracy:  83.0% 

200000/200000 - 8s - loss: 0.8272 - val_loss: 0.8285
Epoch 3/5

 ROC-AUC - epoch: 3 - score: 0.976279 - accuracy:  82.9% 

200000/200000 - 8s - loss: 0.8202 - val_loss: 0.8275
Epoch 4/5

 ROC-AUC - epoch: 4 - score: 0.976584 - accuracy:  83.0% 

200000/200000 - 8s - loss: 0.8186 - val_loss: 0.8233
Epoch 5/5

 ROC-AUC - epoch: 5 - score: 0.976518 - accuracy:  83.1% 

200000/200000 - 8s - loss: 0.8175 - val_loss: 0.8245


<tensorflow.python.keras.callbacks.History at 0x24ef282f5f8>

- 结果不是很好，或许是由于本来没有出现过拟合问题，强行惩罚反而不好

### fully-connected with dropout

In [46]:
model = Sequential()
model.add(Dense(1024,input_shape=(784,))) # 输入层，28*28=784 
model.add(Activation('relu')) # 激活函数是relu
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = SGD(lr=0.1)
model.compile(loss='categorical_crossentropy', optimizer=sgd) 
model.fit(train_dataset,train_labels,batch_size=128,epochs=5,callbacks=[RocAuc], verbose=2,
          validation_data=(valid_dataset, valid_labels))

Train on 200000 samples, validate on 10000 samples
Epoch 1/5

 ROC-AUC - epoch: 1 - score: 0.984641 - accuracy:  85.6% 

200000/200000 - 7s - loss: 0.5867 - val_loss: 0.4881
Epoch 2/5

 ROC-AUC - epoch: 2 - score: 0.987165 - accuracy:  86.8% 

200000/200000 - 7s - loss: 0.4816 - val_loss: 0.4454
Epoch 3/5

 ROC-AUC - epoch: 3 - score: 0.988784 - accuracy:  87.9% 

200000/200000 - 7s - loss: 0.4448 - val_loss: 0.4156
Epoch 4/5

 ROC-AUC - epoch: 4 - score: 0.989589 - accuracy:  88.2% 

200000/200000 - 7s - loss: 0.4190 - val_loss: 0.3999
Epoch 5/5

 ROC-AUC - epoch: 5 - score: 0.990023 - accuracy:  88.7% 

200000/200000 - 7s - loss: 0.4014 - val_loss: 0.3894


<tensorflow.python.keras.callbacks.History at 0x24efcc65320>

- 结果一般

### fully-connected with dropout and learning rate decay

In [52]:
model = Sequential()
model.add(Dense(1024,input_shape=(784,))) # 输入层，28*28=784 
model.add(Activation('relu')) # 激活函数是relu
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = SGD(lr=0.1,decay=1e-04)
model.compile(loss='categorical_crossentropy', optimizer=sgd) 
model.fit(train_dataset,train_labels,batch_size=128,epochs=5,callbacks=[RocAuc], verbose=2,
          validation_data=(valid_dataset, valid_labels))

Train on 200000 samples, validate on 10000 samples
Epoch 1/5

 ROC-AUC - epoch: 1 - score: 0.984523 - accuracy:  85.6% 

200000/200000 - 7s - loss: 0.5895 - val_loss: 0.4889
Epoch 2/5

 ROC-AUC - epoch: 2 - score: 0.987085 - accuracy:  86.8% 

200000/200000 - 7s - loss: 0.4850 - val_loss: 0.4463
Epoch 3/5

 ROC-AUC - epoch: 3 - score: 0.988424 - accuracy:  87.5% 

200000/200000 - 7s - loss: 0.4479 - val_loss: 0.4214
Epoch 4/5

 ROC-AUC - epoch: 4 - score: 0.989213 - accuracy:  87.9% 

200000/200000 - 7s - loss: 0.4252 - val_loss: 0.4072
Epoch 5/5

 ROC-AUC - epoch: 5 - score: 0.989656 - accuracy:  88.2% 

200000/200000 - 7s - loss: 0.4093 - val_loss: 0.3973


<tensorflow.python.keras.callbacks.History at 0x24eff770860>