# 入门｜深度学习实战2：改进上篇的MLP

In [1]:
# 读写csv
import pandas as pd
# 借助sklearn的StratifiedShuffleSplit进行分层抽样
from sklearn.model_selection import StratifiedShuffleSplit
# 导入Keras
from tensorflow import keras
import numpy as np

In [2]:
# 读取训练集
data = pd.read_csv('train.csv')
# 读取测试集
test = pd.read_csv('test.csv')

In [3]:
# 使用StratifiedShuffleSplit类进行分层抽样
split = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)

# 按label属性进行划分并生成训练集和验证集
for train_index, valid_index in split.split(data, data['label']):
    train = data.loc[train_index]
    valid = data.loc[valid_index]

In [4]:
# 划分样本与标签
train_X = train.drop(['label'], axis=1)
train_Y = train['label']

# 验证集也划分一下
valid_X = valid.drop(['label'], axis=1)
valid_Y = valid['label']

In [5]:
# 基础模型参照，2隐藏层
model1 = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=[784]),
    keras.layers.Dense(300, activation='relu'),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

2022-01-20 23:40:54.253403: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-20 23:40:54.257237: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-20 23:40:54.257665: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-20 23:40:54.258520: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [6]:
# 编译模型
model1.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=keras.optimizers.SGD(learning_rate=0.0001),
    metrics=['accuracy']
)

# 保存最佳模型
checkpoint = keras.callbacks.ModelCheckpoint('model1_best.h5',
                                             save_best_only=True)

# 实现早停
early_stop = keras.callbacks.EarlyStopping(patience=2, monitor='accuracy',
                                           min_delta=0.005,
                                           restore_best_weights=True)

# 训练
history = model1.fit(train_X, train_Y, epochs=100,
                     validation_data=(valid_X, valid_Y),
                     callbacks=[checkpoint, early_stop],
                     batch_size=32)

Epoch 1/100
 194/1182 [===>..........................] - ETA: 0s - loss: 13.3061 - accuracy: 0.6551 

2022-01-20 23:40:55.384763: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100


In [7]:
# 模型2
model2 = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=[784]),
    keras.layers.Dense(600, activation='relu'),
    keras.layers.Dense(500, activation='relu'),
    keras.layers.Dense(400, activation='relu'),
    keras.layers.Dense(300, activation='relu'),
    keras.layers.Dense(200, activation='relu'),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

In [8]:
# 编译模型
model2.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=keras.optimizers.SGD(learning_rate=0.0001),
    metrics=['accuracy']
)

# 保存最佳模型
checkpoint = keras.callbacks.ModelCheckpoint('model2_best.h5',
                                             save_best_only=True)

# 实现早停
early_stop = keras.callbacks.EarlyStopping(patience=10, monitor='accuracy',
                                           min_delta=0.001,
                                           restore_best_weights=True)

# 训练
history = model2.fit(train_X, train_Y, epochs=100,
                     validation_data=(valid_X, valid_Y),
                     callbacks=[checkpoint, early_stop],
                     batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100


In [9]:
model1_best = keras.models.load_model('model1_best.h5')

model1_results = model1_best.predict(test)

# 转换格式
model1_results = np.argmax(model1_results, axis=1)
model1_results = pd.Series(model1_results, name='Label')

# 生成预测文件
model1_submission = pd.concat([pd.Series(range(1, 28001), name="ImageId"),
                               model1_results], axis=1)
model1_submission.to_csv('model1_submission.csv', index=False)

In [10]:
model2_best = keras.models.load_model('model2_best.h5')

model2_results = model2_best.predict(test)

# 转换格式
model2_results = np.argmax(model2_results, axis=1)
model2_results = pd.Series(model2_results, name='Label')

# 生成预测文件
model2_submission = pd.concat([pd.Series(range(1, 28001), name="ImageId"),
                               model1_results], axis=1)
model2_submission.to_csv('model2_submission.csv', index=False)