In [24]:
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets
from tqdm import tqdm

In [25]:
train_dataset = datasets.MNIST(root = r'./data',train = True, download = True)
test_dataset = datasets.MNIST(root = r'./data',train = False, download=True)

train_imgs = train_dataset.data.numpy()
train_labels = train_dataset.targets.numpy()
test_imgs = test_dataset.data.numpy()
test_labels = test_dataset.targets.numpy()

In [26]:
# 将图片展开为多组一维数组,并归一化
def normalize_images(imgs):
    imgs = imgs.reshape(-1,28*28)
    return imgs/255

# 判断是否为可迭代对象
def is_iterable(obj):
    try:
        iter(obj)
        return True
    except TypeError:
        return False
    
# 将标签转换为one-hot编码
def one_hot_labels(labels):
    '''
    labels: 标签（一维数组）

    numpy 高级索引功能
    以train_labels为行索引，以train_labels为列索引，将one_hot中的元素赋值为1
    '''
    one_hot = np.zeros((labels.size,labels.max()+1))
    one_hot[np.arange(labels.size),labels] = 1

    return one_hot

# 显示图片
def show_img(img,num=0):
    if is_iterable(img):
        plt.imshow(img[num].reshape(28,28),cmap='gray')
        plt.colorbar()
        plt.show()
    else:
        plt.imshow(img.reshape(28,28),cmap='gray')
        plt.colorbar()
        plt.show()

def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x)*(1-sigmoid(x))

def print_shape(*args):
    for arg in args:
        name = get_arg_name(arg)
        print(f'{name}',arg.shape)

# 获取变量名
def get_arg_name(arg):
    for name in globals().keys():
        if globals()[name] is arg:
            return name

In [27]:
def forward_propagation(imgs,w1,b1,w2,b2):
    '''
    Args:
        imgs: 图片数据
        w1: 第一层权重
        b1: 第一层偏置
        w2: 第二层权重
        b2: 第二层偏置
    Return:
        z1: 第一层线性输出
        a1: 第一层激活输出
        z2: 第二层线性输出
        a2: 第二层激活输出
    '''
    z1 = np.dot(w1,imgs.T)+b1
    a1 = sigmoid(z1)
    z2 = np.dot(w2,a1) + b2
    a2 = sigmoid(z2)
    print_shape(z1,a1,z2,a2)
    return z1,a1,z2,a2

def backward_propagation(imgs,labels,z1,a1,w2,a2,):
    size = imgs.shape[0]
    dz2 = a2 - one_hot_labels(labels[:size]).T
    dw2 = np.dot(dz2,a1.T)
    db2 = dz2.copy()
    da1 = np.dot(dz2.T,w2).T
    dz1 = da1*sigmoid_derivative(z1)
    dw1 = np.dot(dz1,imgs)
    db1 = dz1.copy()
    return dw1,db1,dw2,db2

def update_parameters(w1,b1,w2,b2,dw1,db1,dw2,db2,lr=0.01):
    learning_rate = lr
    w1 = w1 - learning_rate*dw1
    b1 = b1 - learning_rate*db1
    w2 = w2 - learning_rate*dw2
    b2 = b2 - learning_rate*db2
    return w1,b1,w2,b2

In [28]:
train_imgs = normalize_images(train_imgs)
test_imgs = normalize_images(test_imgs)

In [29]:
imgs = train_imgs[:1000]
labels = train_labels[:1000]

In [30]:
n_hidden = 100
n_input = imgs.shape[1]
n_output = labels.max()+1

w1 = np.random.randn(n_hidden,n_input)
b1 = np.zeros((n_hidden,1))
w2 = np.random.randn(n_output,n_hidden)
b2 = np.zeros((n_output,1))

In [32]:
z1,a1,z2,a2 = forward_propagation(imgs,w1,b1,w2,b2)

None (100, 1000)
None (100, 1000)
None (10, 1000)
None (10, 1000)


In [None]:
img = train_imgs[0].reshape(-1,784)

In [None]:
show_img(train_imgs,35)