In [1]:
import struct
import numpy as np
import matplotlib.pyplot as plt

# 读取标签数据集
with open(r'dataset\MNIST\raw\train-labels-idx1-ubyte', 'rb') as lbpath:
    labels_magic, labels_num = struct.unpack('>II', lbpath.read(8))
    labels = np.fromfile(lbpath, dtype=np.uint8)
 
# 读取图片数据集
with open(r'dataset\MNIST\raw\train-images.idx3-ubyte', 'rb') as imgpath:
    images_magic, images_num, rows, cols = struct.unpack('>IIII', imgpath.read(16))
    images = np.fromfile(imgpath, dtype=np.uint8).reshape(images_num, rows * cols) 
 
# # 打印数据信息
# print('labels_magic is {} \n'.format(labels_magic),
#       'labels_num is {} \n'.format(labels_num),
#       'labels is {} \n'.format(labels))
#
# print('images_magic is {} \n'.format(images_magic),
#       'images_num is {} \n'.format(images_num),
#       'rows is {} \n'.format(rows),
#       'cols is {} \n'.format(cols),
#       'images is {} \n'.format(images))

# # 假设images是包含所有图像数据的numpy数组
# # 取出第一张图像的像素值（这是一个长度为784的一维数组）
# first_image_flattened = images[0]
#
# # 将这个一维数组重新塑形为28x28的二维矩阵
# first_image_matrix = first_image_flattened.reshape((rows, cols))
#
# # 打印第一张图片的28x28矩阵
# print(first_image_matrix)
#
# # 测试取出一张图片和对应标签
# choose_num = 0 # 指定一个编号，你可以修改这里
# label = labels[choose_num]
# plt.imshow(first_image_matrix, cmap='gray')
#
# plt.title('the label is : {}'.format(label))
# plt.savefig('dataset/train-labels-idx1-ubyte.png')
# plt.show()

In [2]:
import struct
import numpy as np

# 读取标签数据集
with open(r'dataset\MNIST\raw\train-labels-idx1-ubyte', 'rb') as lbpath:
    labels_magic, labels_num = struct.unpack('>II', lbpath.read(8))
    labels = np.fromfile(lbpath, dtype=np.uint8)

# 读取图片数据集
with open(r'dataset\MNIST\raw\train-images.idx3-ubyte', 'rb') as imgpath:
    images_magic, images_num, rows, cols = struct.unpack('>IIII', imgpath.read(16))
    images = np.fromfile(imgpath, dtype=np.uint8).reshape(images_num, rows, cols)

# 定义卷积核
kernel = np.array([
    [1, 0, 1],
    [0, 1, 0],
    [1, 0, 1]
])

def pad_with_zeros(matrix, pad_width):
    return np.pad(matrix, pad_width, mode='constant', constant_values=0)

def max_pooling(matrix, pool_size=2):
    output_shape = (matrix.shape[0] // pool_size, matrix.shape[1] // pool_size)
    pooled_matrix = np.zeros(output_shape)
    for i in range(0, matrix.shape[0], pool_size):
        for j in range(0, matrix.shape[1], pool_size):
            pooled_matrix[i // pool_size, j // pool_size] = np.max(matrix[i:i + pool_size, j:j + pool_size])
    return pooled_matrix

def conv_single_step(matrix, kernel):
    # 进行零填充
    padded_matrix = pad_with_zeros(matrix, 1)
    print("padded_matrix:"+str(padded_matrix.shape))
    print(padded_matrix)
    # 获取矩阵尺寸
    output_dim = matrix.shape[0] + 2  # 填充后大小
    # 初始化输出矩阵
    output_matrix = np.zeros((output_dim - 2, output_dim - 2))  # 移除填充后大小

    # 进行卷积操作
    for k in range(output_dim - 2):
        for j in range(output_dim - 2):
            region = padded_matrix[k:k+3, j:j+3]
            output_matrix[k, j] = np.sum(region * kernel)
    return output_matrix

def conv(images, kernel, rows=28, cols=28):
    original_matrix = images.reshape((rows, cols))

    # 第一次卷积和池化
    conv1_output = conv_single_step(original_matrix, kernel)
    print("conv1_output:"+str(conv1_output.shape))
    print(conv1_output)
    pool1_output = max_pooling(conv1_output)
    print("pool1_output:"+str(pool1_output.shape))
    print(pool1_output)
    # 第二次卷积和池化
    conv2_output = conv_single_step(pool1_output, kernel)
    print("conv2_output:"+str(conv2_output.shape))
    print(conv2_output)
    pool2_output = max_pooling(conv2_output)
    print("pool2_output:"+str(pool2_output.shape))
    print(pool2_output)

    return pool2_output

# main
# CNN-pooling-flatten
flatten_lst = []
for i in range(3):
    # 设置打印选项以避免科学计数法
    np.set_printoptions(suppress=True, formatter={'all':lambda x: str(int(x))})
    print("第{}张图片：".format(i+1))

    # 取出第一张图像的像素值（这是一个长度为784的一维数组）
    first_image_flattened = images[i]
    # 将这个一维数组重新塑形为28x28的二维矩阵
    first_image_matrix = first_image_flattened.reshape((rows, cols))

    # 打印第一张图片的28x28矩阵
    print("origin_matrix"+str(first_image_matrix.shape))
    print(first_image_matrix)
    pooled_matrix = conv(images[i], kernel)

    # 展成一维向量
    flattened_vector = pooled_matrix.flatten()
    flatten_lst.append(flattened_vector)
    print("flattened_vector:"+str(flattened_vector.shape))
    print(flattened_vector)
    print()

第1张图片：
origin_matrix(28, 28)
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 3 18 18 18 126 136 175 26 166 255 247 127 0 0 0
  0]
 [0 0 0 0 0 0 0 0 30 36 94 154 170 253 253 253 253 253 225 172 253 242
  195 64 0 0 0 0]
 [0 0 0 0 0 0 0 49 238 253 253 253 253 253 253 253 253 251 93 82 82 56 39
  0 0 0 0 0]
 [0 0 0 0 0 0 0 18 219 253 253 253 253 253 198 182 247 241 0 0 0 0 0 0 0
  0 0 0]
 [0 0 0 0 0 0 0 0 80 156 107 253 253 205 11 0 43 154 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 14 1 154 253 90 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 139 253 190 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 11 190 253 70 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 35 241 225 160 108 1 0 0 0 0 0 0 0 0 0 0]
 [

In [3]:
def batch_normalization(X, gamma, beta, epsilon=1e-5):
    # 计算批量均值
    print(X)
    mu = np.mean(X, axis=0)

    # 设置打印选项以避免科学计数法并设置小数点后精度为2
    np.set_printoptions(suppress=True, precision=2)
    print("mu")
    print(mu)

    # 计算批量方差
    var = np.var(X, axis=0)
    print("var")
    print(var)

    # 归一化
    X_hat = (X - mu) / np.sqrt(var + epsilon)

    # 缩放和平移
    Y = gamma * X_hat + beta
    return Y

# BN
X = np.array(flatten_lst)
gamma = np.ones(49)  # 缩放参数
beta = np.zeros(49)   # 平移参数

output = batch_normalization(X, gamma, beta)
print("Batch normalized output:")


[[0 124 565 1073 1335 1153 553 0 1353 3736 4399 3702 2472 760 0 1386 4424
  4606 3417 1684 633 0 201 1926 3285 3298 1458 0 18 580 1921 4092 4797
  2352 0 688 2787 3779 4077 3677 1370 0 715 2380 2778 1902 544 2 0]
 [0 0 227 1441 2240 1102 6 0 238 1826 4666 5228 3186 416 0 1339 3816 4927
  4534 3478 1110 170 2747 3691 2235 3083 3581 1360 341 3153 3683 2669
  3028 2878 771 256 3145 4215 3598 2790 835 71 85 1327 2626 2143 630 0 0]
 [81 143 81 0 391 391 232 991 1379 407 0 1541 2001 567 2024 2430 749 1201
  3184 2810 635 2276 3034 2253 2892 3596 2734 154 1120 2096 1934 2333
  3015 2159 0 0 0 0 1356 2695 1874 0 0 0 0 678 1560 994 0]]
mu
[  27.     89.    291.    838.   1322.    882.    263.67  330.33  990.
 1989.67 3021.67 3490.33 2553.    581.    674.67 1718.33 2996.33 3578.
 3711.67 2657.33  792.67  815.33 1994.   2623.33 2804.   3325.67 2591.
  504.67  493.   1943.   2512.67 3031.33 3613.33 2463.    257.    314.67
 1977.33 2664.67 3010.33 3054.   1359.67   23.67  266.67 1235.67 1801.33
 15

In [13]:
# FC(ReLU+softmax)
def relu(x):
    return np.maximum(0, x)

def softmax(x):
    # 减去每行的最大值以提高数值稳定性
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    # 计算每行的指数总和
    sum_exp = np.sum(e_x, axis=1, keepdims=True)
    # 计算Softmax
    return e_x / sum_exp

def predict_and_find_max_prob(y):
    """对矩阵y应用Softmax，预测类别索引，并找到最大概率值"""
    predicted_classes = np.argmax(y, axis=1)
    max_probabilities = np.max(y, axis=1)
    return predicted_classes, max_probabilities

print("x:"+str(output.shape))
print(output)

w = np.random.uniform(-1, 1, size=(49,10))
print("w:"+str(w.shape))
print(w)

b = np.random.uniform(0, 1, size=(3,10))
print("b:"+str(b.shape))
print(b)

z = np.dot(output, w) + b
print("z:"+str(z.shape))
print(z)

a = relu(z)
print("a:"+str(a.shape))
print(a)

y = softmax(a)
print("softmax结果："+str(y.shape))
# 设置打印选项以避免科学计数法并设置小数点后精度为2
np.set_printoptions(suppress=True, precision=6)
print(y)

predicted_classes, max_probabilities = predict_and_find_max_prob(y)
print("预测的类别索引:"+str(predicted_classes)+"\n最大概率值:"+str(max_probabilities))


x:(3, 49)
[[-0.71  0.55  1.35  0.38  0.02  0.78  1.29 -0.71  0.68  1.28  0.64  0.14
  -0.17  1.27 -0.71 -0.66  0.89  0.61 -0.5  -1.31 -0.71 -0.79 -1.41 -0.91
   1.11 -0.13 -1.3  -0.83 -1.03 -1.29 -0.71  1.39  1.41 -0.37 -0.71  1.31
   0.58  0.59  0.9   1.41  0.02 -0.71  1.41  1.18  0.77  0.51 -0.8  -0.7
   0.  ]
 [-0.71 -1.4  -0.32  0.99  1.22  0.63 -1.15 -0.71 -1.41 -0.12  0.77  1.15
   1.3  -1.17 -0.71 -0.75  0.51  0.8   1.4   1.11  1.41 -0.62  0.59  1.39
  -1.31 -1.15  1.13  1.41 -0.33  1.15  1.41 -0.48 -0.7   1.37  1.41 -0.21
   0.83  0.82  0.5  -0.6  -1.24  1.41 -0.57  0.09  0.65  0.89 -0.61 -0.71
   0.  ]
 [ 1.41  0.85 -1.04 -1.37 -1.23 -1.41 -0.14  1.41  0.73 -1.16 -1.41 -1.29
  -1.13 -0.1   1.41  1.41 -1.4  -1.41 -0.9   0.21 -0.7   1.41  0.82 -0.48
   0.2   1.29  0.16 -0.58  1.36  0.14 -0.7  -0.92 -0.71 -1.   -0.71 -1.11
  -1.41 -1.41 -1.39 -0.81  1.21 -0.71 -0.84 -1.27 -1.41 -1.4   1.41  1.41
   0.  ]]
w:(49, 10)
[[-0.72  0.1   0.4   0.97 -0.12 -0.36  0.76  0.92 -0.49  0.21]
 

In [14]:
# 计算损失函数

# 实际one-hot值（5、0、4）
Y = np.array([
    [0,0,0,0,0,1,0,0,0,0],
    [1,0,0,0,0,0,0,0,0,0],
    [0,0,0,0,1,0,0,0,0,0]
])

# 计算交叉熵损失的标量值,y为预测值
cross_entropy_loss = -np.sum(Y * np.log(y)) / Y.shape[0]
print(cross_entropy_loss)

# 计算每个样本的交叉熵损失，并输出一个向量
cross_entropy_loss_vector = -np.sum(Y * np.log(y), axis=1)
cross_entropy_loss_vector

5.287122189073737


array([1.266355, 7.176183, 7.418829])