# MNIST数据集

## 1. 加载数据集

In [None]:
from keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

## 2. 数据探索

In [None]:
import matplotlib.pyplot as plt

def plot_figure(im, interp = False):    # 线性插值
    f = plt.figure(figsize = (3, 6))
    plt.gray()
    plt.imshow(im, interpolation = None if interp else 'none')

In [None]:
plot_figure(train_images[0])

In [None]:
train_images.shape

In [None]:
len(train_labels)

In [None]:
train_labels

In [None]:
test_images.shape

In [None]:
len(test_labels)

In [None]:
test_labels

## 3. 数据预处理

### 3.1 数据标准化(Normalization)

In [None]:
train_images_dense = train_images.reshape((60000, 28 * 28))
train_images_dense = train_images_dense.astype('float32') / 255

test_images_dense = test_images.reshape((10000, 28 * 28))
test_images_dense = test_images_dense.astype('float32') / 255

In [None]:
train_images_conv = train_images.reshape((60000, 28, 28, 1))
train_images_conv = train_images_conv.astype('float32') / 255

test_images_conv = test_images.reshape((10000, 28, 28, 1))
test_images_conv = test_images_conv.astype('float32') / 255

### 3.2 one-hot encoding

In [None]:
from keras.utils import to_categorical

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [None]:
train_labels[0]

In [None]:
print(train_labels.shape)

## 4. 构建模型

### 4.1 构建全连接网络

In [None]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
model.add(layers.Dense(10, activation='softmax'))

In [None]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.summary()

### 4.1.1 训练网络

In [None]:
history = model.fit(train_images_dense, train_labels, epochs=10, batch_size=128)

### 4.1.2 验证训练好的模型

In [None]:
test_loss, test_acc = model.evaluate(test_images_dense, test_labels)

In [None]:
print('test_acc:', test_acc)

### 4.1.3 在训练时加入验证集()

In [None]:
history = model.fit(train_images_dense, train_labels, 
                    epochs=10, 
                    batch_size=128, 
                    validation_data=(test_images_dense, test_labels))

Our test set accuracy turns out to be 97.8% -- that's quite a bit lower than the training set accuracy. 
This gap between training accuracy and test accuracy is an example of "overfitting", 
the fact that machine learning models tend to perform worse on new data than on their training data. 

### 4.1.4 画出训练集和验证集上的表现

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
val_loss_min = val_loss.index(min(val_loss))
val_acc_max = val_acc.index(max(val_acc))
print('validation set min loss: ', val_loss_min)
print('validation set max accuracy: ', val_acc_max)

## 4.2 构建卷积神经网路

In [None]:
from keras import layers
from keras import models

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

In [None]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.summary()

### 4.2.1 训练模型(有验证集)

In [None]:
history = model.fit(train_images_conv, train_labels, 
                    epochs=10, 
                    batch_size=64, 
                    validation_data=(test_images_conv, test_labels))

### 4.2.2 验证模型

In [None]:
test_loss, test_acc = model.evaluate(test_images_conv, test_labels)

In [None]:
test_acc

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
val_loss_min = val_loss.index(min(val_loss))
val_acc_max = val_acc.index(max(val_acc))
print('validation set min loss: ', val_loss_min)
print('validation set max accuracy: ', val_acc_max)

In [None]:
import itertools

import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.datasets import mnist
import keras.backend as K

import matplotlib.pyplot as plt
from matplotlib.colors import colorConverter, ListedColormap

np.random.seed(1)
%matplotlib inline

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
X_train.shape

In [None]:
# Flatten 28*28 images to a 784 vector for each image
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')

In [None]:
num_pixels

In [None]:
X_train.shape

In [None]:
# normalize inputs from 0-255 to 0-1
X_train = X_train / 255.0
X_test = X_test / 255.0

In [None]:
y_train[:10]

In [None]:
# one-hot
num_classes = len(set(y_train))
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

In [None]:
y_train

### 全连接网络

In [None]:
model = Sequential()
model.add(Dense(num_pixels, input_dim = num_pixels, activation = 'relu'))
model.add(Dense(num_classes, activation = 'softmax'))

model.compile(loss = 'categorical_crossentropy', 
              optimizer = 'adam', 
              metrics = ['accuracy'])

In [None]:
model.fit(X_train, y_train, 
          validation_data = (X_test, y_test), 
          epochs = 10, 
          batch_size = 128, 
          verbose = True)

In [None]:
model.summary()

784*10再加上常数项是7850

### 卷积网络

In [None]:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')

In [None]:
X_train.shape

In [None]:
model = Sequential()
model.add(Conv2D(32, 3, input_shape = (28, 28, 1), activation = 'relu'))
model.add(Conv2D(32, 3, activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Conv2D(64, 3, activation = 'relu'))
model.add(Conv2D(64, 3, activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2))
model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(Dense(num_classes, activation = 'softmax'))

# Compile model
model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

In [None]:
# Fit the model
model.fit(X_train, y_train, 
          validation_data=(X_test, y_test), 
          epochs = 10, 
          batch_size = 128)

# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Large CNN Error: %.2f%%" % (100 - scores[1] * 100))

In [None]:
model.summary()

卷积深度深，参数还少