In [None]:
from utils import *


# 读取数据集
train_imgs = read_image("data/train-images.idx3-ubyte")
train_labels = read_label("data/train-labels.idx1-ubyte")
test_imgs = read_image("data/t10k-images.idx3-ubyte")
test_labels = read_label("data/t10k-labels.idx1-ubyte")

# 划分训练集和验证集
valid_imgs = train_imgs[:1000]
valid_labels = train_labels[:1000]
train_imgs = train_imgs[1000:11000]
train_labels = train_labels[1000:11000]
test_imgs = test_imgs[:1000]
test_labels = test_labels[:1000]

# 对训练集做数据增强（加噪，平移和裁剪）
shape = (28, 28)
noised_imgs = add_noise(train_imgs, noise_rate=0.2)
trans_imgs = image_translation(train_imgs, shape)
cropped_imgs = random_crop_and_pad(train_imgs, shape)

train_imgs = train_imgs / 255
train_imgs = np.where(train_imgs > 0.5, 1, 0)
valid_imgs = valid_imgs / 255
valid_imgs = np.where(valid_imgs > 0.5, 1, 0)
test_imgs = test_imgs / 255
test_imgs = np.where(test_imgs > 0.5, 1, 0)

noised_imgs = noised_imgs / 255
noised_imgs = np.where(noised_imgs > 0.5, 1, 0)
trans_imgs = trans_imgs / 255
trans_imgs = np.where(trans_imgs > 0.5, 1, 0)
cropped_imgs = cropped_imgs / 255
cropped_imgs = np.where(cropped_imgs > 0.5, 1, 0)

In [None]:
from model import *
import matplotlib.pyplot as plt

# 比较不同的模型结构
num_labels = 10
train_label_vectors = np.eye(num_labels)[train_labels]

layers_1 = [28 * 28, 256, 128, num_labels]
layers_2 = [28 * 28, 512, 128, num_labels]
layers_3 = [28 * 28, 256, num_labels]

# 模型中使用了dropout
dropout = 0
bp_net_1 = back_propagation(layers=layers_1, dropout=dropout, classifacation=True)
bp_net_2 = back_propagation(layers=layers_2, dropout=dropout, classifacation=True)
bp_net_3 = back_propagation(layers=layers_3, dropout=dropout, classifacation=True)

# 训练模型
batch_size = 32
epochs = 20
learning_rate = 0.01
l2_reg = 0
momentum = 0
_, valid_accs_1, train_losses_1 = bp_net_1.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum)
_, valid_accs_2, train_losses_2 = bp_net_2.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum)
_, valid_accs_3, train_losses_3 = bp_net_3.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum)

labels_pred_1 = np.argmax(np.array(bp_net_1.predict(test_imgs)), axis=1)
acc_1 = (labels_pred_1 == test_labels).mean()
labels_pred_2 = np.argmax(np.array(bp_net_2.predict(test_imgs)), axis=1)
acc_2 = (labels_pred_2 == test_labels).mean()
labels_pred_3 = np.argmax(np.array(bp_net_3.predict(test_imgs)), axis=1)
acc_3 = (labels_pred_3 == test_labels).mean()

print("Accuracy_1:", acc_1)
print("Accuracy_2:", acc_2)
print("Accuracy_3:", acc_3)


# 展示训练效果
x = np.arange(len(train_losses_1))
plt.figure(1)
plt.plot(x, train_losses_1, color='red', label='model 1')
plt.plot(x, train_losses_2, color='blue', label='model 2')
plt.plot(x, train_losses_3, color='green', label='model 3')
plt.xlabel('epoch')
plt.ylabel('train_loss')
plt.title('loss')
plt.legend()

plt.figure(2)
plt.plot(x, valid_accs_1, color='red', label='model 1')
plt.plot(x, valid_accs_2, color='blue', label='model 2')
plt.plot(x, valid_accs_3, color='green', label='model 3')
plt.xlabel('epoch')
plt.ylabel('valid_acc')
plt.title('accuracy')
plt.legend()
plt.show()

del bp_net_1, bp_net_2, bp_net_3

In [None]:
# 比较momentum strength的影响
layers = [28 * 28, 256, 128, num_labels]
bp_net_1 = back_propagation(layers=layers, dropout=dropout, classifacation=True)
bp_net_2 = back_propagation(layers=layers, dropout=dropout, classifacation=True)
bp_net_3 = back_propagation(layers=layers, dropout=dropout, classifacation=True)

momentum_1 = 0
momentum_2 = 0.5
momentum_3 = 0.9
_, valid_accs_1, train_losses_1 = bp_net_1.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum_1)
_, valid_accs_2, train_losses_2 = bp_net_2.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum_2)
_, valid_accs_3, train_losses_3 = bp_net_3.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum_3)

labels_pred_1 = np.argmax(np.array(bp_net_1.predict(test_imgs)), axis=1)
acc_1 = (labels_pred_1 == test_labels).mean()
labels_pred_2 = np.argmax(np.array(bp_net_2.predict(test_imgs)), axis=1)
acc_2 = (labels_pred_2 == test_labels).mean()
labels_pred_3 = np.argmax(np.array(bp_net_3.predict(test_imgs)), axis=1)
acc_3 = (labels_pred_3 == test_labels).mean()

print("Accuracy_1:", acc_1)
print("Accuracy_2:", acc_2)
print("Accuracy_3:", acc_3)


# 展示训练效果
x = np.arange(len(train_losses_1))
plt.figure(1)
plt.plot(x, train_losses_1, color='red', label='momentum = 0')
plt.plot(x, train_losses_2, color='blue', label='momentum = 0.5')
plt.plot(x, train_losses_3, color='green', label='momentum = 0.9')
plt.xlabel('epoch')
plt.ylabel('train_loss')
plt.title('loss')
plt.legend()

plt.figure(2)
plt.plot(x, valid_accs_1, color='red', label='momentum = 0')
plt.plot(x, valid_accs_2, color='blue', label='momentum = 0.5')
plt.plot(x, valid_accs_3, color='green', label='momentum = 0.9')
plt.xlabel('epoch')
plt.ylabel('valid_acc')
plt.title('accuracy')
plt.legend()
plt.show()

del bp_net_1, bp_net_2, bp_net_3

In [None]:
# L2正则项的效果比较
bp_net_1 = back_propagation(layers=layers, dropout=dropout, classifacation=True)
bp_net_2 = back_propagation(layers=layers, dropout=dropout, classifacation=True)
bp_net_3 = back_propagation(layers=layers, dropout=dropout, classifacation=True)

l2_reg_1 = 0
l2_reg_2 = 0.001
l2_reg_3 = 0.01
_, valid_accs_1, train_losses_1 = bp_net_1.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg_1, momentum)
_, valid_accs_2, train_losses_2 = bp_net_2.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg_2, momentum)
_, valid_accs_3, train_losses_3 = bp_net_3.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg_3, momentum)

labels_pred_1 = np.argmax(np.array(bp_net_1.predict(test_imgs)), axis=1)
acc_1 = (labels_pred_1 == test_labels).mean()
labels_pred_2 = np.argmax(np.array(bp_net_2.predict(test_imgs)), axis=1)
acc_2 = (labels_pred_2 == test_labels).mean()
labels_pred_3 = np.argmax(np.array(bp_net_3.predict(test_imgs)), axis=1)
acc_3 = (labels_pred_3 == test_labels).mean()

print("Accuracy_1:", acc_1)
print("Accuracy_2:", acc_2)
print("Accuracy_3:", acc_3)


# 展示训练效果
x = np.arange(len(train_losses_1))
plt.figure(1)
plt.plot(x, train_losses_1, color='red', label='l2_reg = 0')
plt.plot(x, train_losses_2, color='blue', label='l2_reg = 0.001')
plt.plot(x, train_losses_3, color='green', label='l2_reg = 0.01')
plt.xlabel('epoch')
plt.ylabel('train_loss')
plt.title('loss')
plt.legend()

plt.figure(2)
plt.plot(x, valid_accs_1, color='red', label='l2_reg = 0')
plt.plot(x, valid_accs_2, color='blue', label='l2_reg = 0.001')
plt.plot(x, valid_accs_3, color='green', label='l2_reg = 0.01')
plt.xlabel('epoch')
plt.ylabel('valid_acc')
plt.title('accuracy')
plt.legend()
plt.show()

del bp_net_1, bp_net_2, bp_net_3

In [None]:
# 两种损失函数的对比
bp_net_1 = back_propagation(layers=layers, dropout=dropout, classifacation=True)
bp_net_2 = back_propagation(layers=layers, dropout=dropout, classifacation=False)

_, valid_accs_1, train_losses_1 = bp_net_1.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum)
_, valid_accs_2, train_losses_2 = bp_net_2.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum)

labels_pred_1 = np.argmax(np.array(bp_net_1.predict(test_imgs)), axis=1)
acc_1 = (labels_pred_1 == test_labels).mean()
labels_pred_2 = np.argmax(np.array(bp_net_2.predict(test_imgs)), axis=1)
acc_2 = (labels_pred_2 == test_labels).mean()

print("Accuracy_1:", acc_1)
print("Accuracy_2:", acc_2)


# 展示训练效果
x = np.arange(len(train_losses_1))
plt.figure(1)
plt.plot(x, train_losses_1, color='red', label='model 1')
plt.plot(x, train_losses_2, color='blue', label='model 2')
plt.xlabel('epoch')
plt.ylabel('train_loss')
plt.title('loss')
plt.legend()

plt.figure(2)
plt.plot(x, valid_accs_1, color='red', label='model 1')
plt.plot(x, valid_accs_2, color='blue', label='model 2')
plt.xlabel('epoch')
plt.ylabel('valid_acc')
plt.title('accuracy')
plt.legend()
plt.show()

del bp_net_1, bp_net_2

In [None]:
# 对偏置的比较
bp_net_1 = back_propagation(layers=layers, dropout=dropout, classifacation=True)
bp_net_2 = back_propagation(layers=layers, dropout=dropout, classifacation=False)

_, valid_accs_1, train_losses_1 = bp_net_1.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum)
_, valid_accs_2, train_losses_2 = bp_net_2.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum, full_bias=False)

labels_pred_1 = np.argmax(np.array(bp_net_1.predict(test_imgs)), axis=1)
acc_1 = (labels_pred_1 == test_labels).mean()
labels_pred_2 = np.argmax(np.array(bp_net_2.predict(test_imgs)), axis=1)
acc_2 = (labels_pred_2 == test_labels).mean()

print("Accuracy_1:", acc_1)
print("Accuracy_2:", acc_2)


# 展示训练效果
x = np.arange(len(train_losses_1))
plt.figure(1)
plt.plot(x, train_losses_1, color='red', label='model 1')
plt.plot(x, train_losses_2, color='blue', label='model 2')
plt.xlabel('epoch')
plt.ylabel('train_loss')
plt.title('loss')
plt.legend()

plt.figure(2)
plt.plot(x, valid_accs_1, color='red', label='model 1')
plt.plot(x, valid_accs_2, color='blue', label='model 2')
plt.xlabel('epoch')
plt.ylabel('valid_acc')
plt.title('accuracy')
plt.legend()
plt.show()

del bp_net_1, bp_net_2

In [None]:
# 对 dropout 的比较
dropout_1 = 0
dropout_2 = 0.1
dropout_3 = 0.5

bp_net_1 = back_propagation(layers=layers, dropout=dropout_1, classifacation=True)
bp_net_2 = back_propagation(layers=layers, dropout=dropout_2, classifacation=True)
bp_net_3 = back_propagation(layers=layers, dropout=dropout_3, classifacation=True)

_, valid_accs_1, train_losses_1 = bp_net_1.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg_1, momentum)
_, valid_accs_2, train_losses_2 = bp_net_2.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg_2, momentum)
_, valid_accs_3, train_losses_3 = bp_net_3.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg_3, momentum)

labels_pred_1 = np.argmax(np.array(bp_net_1.predict(test_imgs)), axis=1)
acc_1 = (labels_pred_1 == test_labels).mean()
labels_pred_2 = np.argmax(np.array(bp_net_2.predict(test_imgs)), axis=1)
acc_2 = (labels_pred_2 == test_labels).mean()
labels_pred_3 = np.argmax(np.array(bp_net_3.predict(test_imgs)), axis=1)
acc_3 = (labels_pred_3 == test_labels).mean()

print("Accuracy_1:", acc_1)
print("Accuracy_2:", acc_2)
print("Accuracy_3:", acc_3)


# 展示训练效果
x = np.arange(len(train_losses_1))
plt.figure(1)
plt.plot(x, train_losses_1, color='red', label='dropout = 0')
plt.plot(x, train_losses_2, color='blue', label='dropout = 0.1')
plt.plot(x, train_losses_3, color='green', label='dropout = 0.5')
plt.xlabel('epoch')
plt.ylabel('train_loss')
plt.title('loss')
plt.legend()

plt.figure(2)
plt.plot(x, valid_accs_1, color='red', label='dropout = 0')
plt.plot(x, valid_accs_2, color='blue', label='dropout = 0.1')
plt.plot(x, valid_accs_3, color='green', label='dropout = 0.5')
plt.xlabel('epoch')
plt.ylabel('valid_acc')
plt.title('accuracy')
plt.legend()
plt.show()

del bp_net_1, bp_net_2, bp_net_3

In [None]:
# 模型训练与微调
bp_net = back_propagation(layers=layers, dropout=dropout, classifacation=True)
_, valid_accs, train_losses = bp_net.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum)

x = np.arange(len(train_losses))
plt.figure(1)
plt.plot(x, train_losses, color='red', label='train')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('train loss')
plt.legend()

plt.figure(2)
plt.plot(x, valid_accs, color='red', label='train')
plt.xlabel('epoch')
plt.ylabel('valid acc')
plt.title('train accuracy')
plt.legend()
plt.show()

labels_pred = np.argmax(np.array(bp_net.predict(test_imgs)), axis=1)
acc = np.sum(labels_pred == test_labels) / len(test_labels)

print("Accuracy:", acc)

normal_valid_accs = valid_accs
normal_train_losses = train_losses
normal_test_acc = acc

# 对 bp_net 最后一层进行微调
_, valid_accs, ft_losses = bp_net.fine_tune(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum)

# 展示微调效果
x = np.arange(len(ft_losses))
plt.figure(1)
plt.plot(x, ft_losses, color='red', label='fine tune')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('fine tune loss')
plt.legend()

plt.figure(2)
plt.plot(x, valid_accs, color='red', label='fine tune')
plt.xlabel('epoch')
plt.ylabel('valid acc')
plt.title('fine tune accuracy')
plt.legend()
plt.show()

# 测试微调结果
labels_pred = np.argmax(np.array(bp_net.predict(test_imgs)), axis=1)
acc = (labels_pred == test_labels).mean()

print("Accuracy:", acc)

del bp_net

In [None]:
# 添加卷积层后的效果
in_channels = 1
out_channels = 3
kernel_size = (5, 5)
layers = [out_channels * 28 * 28, 256, 128, num_labels]
conv_bp = Conv_BP(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, layers=layers, padding=2, dropout=dropout, classifacation=True)

# 训练模型
train_imgs = np.reshape(train_imgs, (train_imgs.shape[0], 1, 28, 28))
valid_imgs = np.reshape(valid_imgs, (valid_imgs.shape[0], 1, 28, 28))
test_imgs = np.reshape(test_imgs, (test_imgs.shape[0], 1, 28, 28))
_, valid_accs, train_losses = conv_bp.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size=batch_size, epochs=epochs, lr=learning_rate, l2_regularization=l2_reg, momentum_strength=momentum)

x = np.arange(len(train_losses))
plt.figure(1)
plt.plot(x, train_losses, color='red', label='with conv')
plt.plot(x, normal_train_losses, color='blue', label='without conv')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('train loss')
plt.legend()

plt.figure(2)
plt.plot(x, valid_accs, color='red', label='with conv')
plt.plot(x, normal_valid_accs, color='blue', label='without conv')
plt.xlabel('epoch')
plt.ylabel('valid acc')
plt.title('train accuracy')
plt.legend()
plt.show()

labels_pred = np.argmax(np.array(conv_bp.predict(test_imgs)), axis=1)
acc = (labels_pred == test_labels).mean()
print("Accuracy:", acc)

del conv_bp

In [None]:
# 合并增强后的训练集
train_imgs = train_imgs.reshape((train_imgs.shape[0], -1))
valid_imgs = valid_imgs.reshape((valid_imgs.shape[0], -1))
print(train_imgs.shape, train_labels.shape)
train_imgs = np.concatenate((train_imgs, noised_imgs, trans_imgs, cropped_imgs), axis=0)
train_labels = np.concatenate((train_labels, train_labels, train_labels, train_labels), axis=0)
train_label_vectors = np.eye(num_labels)[train_labels]
print(train_imgs.shape, train_labels.shape)

# 模型训练
layers = [28 * 28, 256, 128, num_labels]
bp_net = back_propagation(layers=layers, dropout=dropout, classifacation=True)
_, valid_accs, train_losses = bp_net.train(train_imgs, train_label_vectors, valid_imgs, valid_labels, batch_size, epochs, learning_rate, l2_reg, momentum)

x = np.arange(len(train_losses))
plt.figure(1)
plt.plot(x, train_losses, color='red', label='with argumentation')
plt.plot(x, normal_train_losses, color='blue', label='without argumentation')
plt.plot()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('train loss')
plt.legend()

plt.figure(2)
plt.plot(x, valid_accs, color='red', label='with argumentation')
plt.plot(x, normal_valid_accs, color='blue', label='without argumentation')
plt.xlabel('epoch')
plt.ylabel('valid acc')
plt.title('train accuracy')
plt.legend()
plt.show()

labels_pred = np.argmax(np.array(bp_net.predict(test_imgs)), axis=1)
acc = np.sum(labels_pred == test_labels) / len(test_labels)

print("Accuracy:", acc)
