In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
!ls "/content/drive/My Drive/Deep Learning based on python/"

1.ipynb  4.5.3_tarin_nn_epoch.ipynb  gradient.py  mnist.py
2.ipynb  functions.py		     mnist.pkl	  __pycache__


In [3]:
import sys, os
sys.path.append('/content/drive/My Drive/Deep Learning based on python/')
import numpy as np
import matplotlib.pyplot as plt
from mnist import load_mnist
from functions import *
from gradient import numerical_gradient

class TwolayerNet(object):
	"""docstring for TwolayerNet"""
	## 输入数据大小，隐藏层数据大小，输出层数据大小，
	def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
		self.params = {}
		self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)	# 高斯分布一个i*h大小的随机矩阵
		self.params['b1'] = np.zeros(hidden_size)		# 偏置都用0初始化
		self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
		self.params['b2'] = np.zeros(output_size)

	def predict(self, x):
		W1, W2 = self.params['W1'], self.params['W2']
		b1, b2 = self.params['b1'],	self.params['b2']

		a1 = np.dot(x, W1) + b1
		z1 = sigmoid(a1)
		a2 = np.dot(z1, W2) + b2
		y = softmax(a2)

		return y

	def loss(self, x, t):
		y = self.predict(x)

		return cross_entropy_error(y, t)

	def accuracy(self, x, t):
		y = self.predict(x)
		y = np.argmax(y, axis=1)
		t = np.argmax(t, axis=1)

		accuracy = np.sum(y == t) / float(x.shape[0])
		return accuracy

	def numerical_gradient(self, x, t):
		loss_W = lambda W: self.loss(x, t)

		grads = {}
		grads['W1'] = numerical_gradient(loss_W, self.params['W1'])		#这里numerical_gradient依然是用的数值微分来做梯度
		grads['b1'] = numerical_gradient(loss_W, self.params['b1'])		#之后的章节会学习反向传播法来做更快的梯度计算
		grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
		grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

		return grads

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

#超参数
train_size = x_train.shape[0]	# 60000
batch_size = 100 		# mini batch的大小为100 ，这样相当于每次要从60000个训练数据中随机取出100个

train_loss_list = []
train_acc_list = []
test_acc_list = []
# 平均每个epoch的重复次数
iter_per_epoch = max(train_size / batch_size, 1)

#超参数
iters_num = 10000		# 梯度更新的次数，此处为10000次
learning_rate = 0.1
network = TwolayerNet(input_size=784, hidden_size=50,output_size=10)

for i in range(iters_num):
	#	获取mini-batch
	batch_mask = np.random.choice(train_size, batch_size)	# 从train_size这么多个数据（60000）中随机抽取batch_size这么多个数据(100)个
	x_batch = x_train[batch_mask]
	t_batch = t_train[batch_mask]

	#	计算梯度
	grad = network.numerical_gradient(x_batch, t_batch)
	# grad = network.gradient(x_batch,t_batch)	# 高速版

	# 梯度下降，因为这里mini batch是随机选的，所以是SGD方法，随机梯度下降法
	for key in ('W1', 'b1', 'W2', 'b2'):
		network.params[key] -= learning_rate * grad[key]

	# 记录学习过程，记录每一次的loss
	loss = network.loss(x_batch, t_batch)
	train_loss_list.append(loss)

#	print("i: ",i)

	#计算每个epoch的识别精度,每600次循环为一个epoch
	if i % iter_per_epoch == 0:
		train_acc = network.accuracy(x_train, t_train)
		test_acc = network.accuracy(x_test, t_test)
		train_acc_list.append(train_acc)
		test_acc_list.append(test_acc)
		print("train acc, test acc | " + str(train_acc) + "," + str(test_acc))

# 绘制图形
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()

KeyboardInterrupt: ignored