In [8]:
"""Day 4 Question a:
1.感知机的层数和维数均不是越大越好,没有一个固定的规则来确定感知机的层数和维度。
2.层数变化：会影响网络的表示能力以及学习能力，增加层数可以提高网络的学习能力，使其能够处理更复杂的模式和输入特征。
  但相应地，如果网络的输入模型本身较简单，特征较少，层数过多可能会导致过拟合，是模型效果下降，也可能导致梯度消失或爆炸
3.维度变化：维度是指各层的神经元数量，大小一般<=数据最初输入特征，>=数据最终输出特征
  增加感知机的维度同样可以增加网络的学习能力和表达能力。高维度可以提供更多的参数和自由度，使得感知机能够学习到更复杂的特征和模式。
  但增加维度也会增加网络的计算复杂度和存储需求，可能需要更长的训练时间和更高的设备性能，过高的维度同样可能导致过拟合
4.决定感知机的层数和维度要根据具体的任务需求和数据集来进行权衡。一般来说，如果数据集较简单或者维度较低，较低的层数和维度就足够了；
  而对于复杂的数据集和任务，适当地增加层数和维度可以提升感知机的性能。
5.以下实验中逐步增加网络层数，从结果中可以看出准确率先升后降，在本题中层数为2较合适
"""
import torch
import matplotlib.pyplot as plt
import numpy as np
from torch.utils import data
from torchvision import transforms
from torch import nn
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
%matplotlib inline
%config InlineBackend.figure_format = 'svg'  #输出svg矢量图格式

class MLP_01(nn.Module):
    def __init__(self):
        super(MLP_01, self).__init__()
        self.fc1 = nn.Linear(784, 10)
    
    def forward(self, x):
        x = self.fc1(x)
        return x

class MLP_02(nn.Module):
    def __init__(self):
        super(MLP_02, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256, 10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

class MLP_03(nn.Module):
    def __init__(self):
        super(MLP_03, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

class MLP_04(nn.Module):
    def __init__(self):
        super(MLP_04, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32,10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        return x

def load_mnist(path, kind='train'):  
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

def net(X):
    X = X.reshape((-1, num_inputs))
    H = relu(X@W1 + b1)  # 这里“@”代表矩阵乘法
    return (H@W2 + b2)

def get_FashionMNIST(batch_size_input):
    image_train, label_train = load_mnist('D:\DataSet', kind='train')  #FashionMNIST数据集文件存放在D盘DataSet文件夹中
    image_test, label_test = load_mnist('D:\DataSet', kind='t10k')
    
    X_train = next(iter(data.DataLoader(image_train, batch_size=batch_size_input)))
    X_train = X_train.to(torch.float)
    X_train = X_train / 255.0   # X_train =(X_train+0.5)/0.5
    y_train = next(iter(data.DataLoader(label_train, batch_size=batch_size_input)))

    X_test = next(iter(data.DataLoader(image_test, batch_size=batch_size_input)))
    X_test = X_test.to(torch.float)
    X_test = X_test / 255.0   # X_test =(X_test+0.5)/0.5
    y_test = next(iter(data.DataLoader(label_test, batch_size=batch_size_input)))

    train_loader=[X_train,y_train]
    test_loader=[X_test,y_test]
    return train_loader,test_loader

def main_func(batch_size_input,model):
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    train_loader,test_loader=get_FashionMNIST(batch_size_input)
    for epoch in range(num_epochs):
        for images, labels in zip(train_loader[0],train_loader[1]):
            # 前向传播
            outputs = model(images)
            loss = criterion(outputs, labels)        
            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')
    model.eval()  # 设置模型为评估模式
    with torch.no_grad():
        correct = 0
        total = batch_size_input
        for images, labels in zip(test_loader[0],test_loader[1]):
            outputs = model(images)
            predicted = torch.argmax(outputs)
            if predicted.item() == labels.item():
                correct += 1

        print(f'Accuracy on the test set: {100 * correct / total}%')

criterion = nn.CrossEntropyLoss()

num_epochs = 10
batch_size_input = int(input("请输入批量大小："))

print("单层网络：/n")
model_01 = MLP_01()
main_func(batch_size_input,model_01)

print("双层网络：/n")
model_02 = MLP_02()
main_func(batch_size_input,model_02)

print("三层网络：/n")
model_03 = MLP_03()
main_func(batch_size_input,model_03)

print("四层网络：/n")
model_04 = MLP_04()
main_func(batch_size_input,model_04)

请输入批量大小：1000
单层网络：/n
Epoch [1/10], Loss: 0.13173384964466095
Epoch [2/10], Loss: 0.09708321839570999
Epoch [3/10], Loss: 0.0790652260184288
Epoch [4/10], Loss: 0.06418188661336899
Epoch [5/10], Loss: 0.051808591932058334
Epoch [6/10], Loss: 0.04192375764250755
Epoch [7/10], Loss: 0.03434714302420616
Epoch [8/10], Loss: 0.028638239949941635
Epoch [9/10], Loss: 0.024294991046190262
Epoch [10/10], Loss: 0.020915543660521507
Accuracy on the test set: 77.6%
双层网络：/n
Epoch [1/10], Loss: 0.22282391786575317
Epoch [2/10], Loss: 0.19752736389636993
Epoch [3/10], Loss: 0.18238328397274017
Epoch [4/10], Loss: 0.13184240460395813
Epoch [5/10], Loss: 0.08392530679702759
Epoch [6/10], Loss: 0.04860299825668335
Epoch [7/10], Loss: 0.041925475001335144
Epoch [8/10], Loss: 0.027425896376371384
Epoch [9/10], Loss: 0.012908351607620716
Epoch [10/10], Loss: 0.01015567034482956
Accuracy on the test set: 80.1%
三层网络：/n
Epoch [1/10], Loss: 0.2749575674533844
Epoch [2/10], Loss: 0.21666288375854492
Epoch [3/10]

In [11]:
"""为更好看出各层维度对网络的影响，以下实验网络层数定为四
   从实验结果可以看出，网络维度之间的跨度不要太大也不要太小，应该根据层数大小适当调整
"""
class MLP_05(nn.Module):
    def __init__(self):
        super(MLP_05, self).__init__()
        self.fc1 = nn.Linear(784, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32,10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        return x
    
class MLP_06(nn.Module):
    def __init__(self):
        super(MLP_06, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 64)
        self.fc4 = nn.Linear(64,10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        return x

class MLP_07(nn.Module):
    def __init__(self):
        super(MLP_07, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32,10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        return x

batch_size_input = 500
print("网络维度784-128-64-62-10：/n")
model_05 = MLP_05()
main_func(batch_size_input,model_01)

print("网络维度784-512-256-64-10：/n")
model_06 = MLP_06()
main_func(batch_size_input,model_02)

print("网络维度784-512-128-32-10：/n")
model_07 = MLP_07()
main_func(batch_size_input,model_07)


网络维度784-128-64-62-10：/n
Epoch [1/10], Loss: 0.19011160731315613
Epoch [2/10], Loss: 0.12320341169834137
Epoch [3/10], Loss: 0.09708138555288315
Epoch [4/10], Loss: 0.08416499942541122
Epoch [5/10], Loss: 0.07708911597728729
Epoch [6/10], Loss: 0.07311637699604034
Epoch [7/10], Loss: 0.07099033147096634
Epoch [8/10], Loss: 0.07005282491445541
Epoch [9/10], Loss: 0.0699198916554451
Epoch [10/10], Loss: 0.07034587115049362
Accuracy on the test set: 79.4%
网络维度784-512-256-64-10：/n
Epoch [1/10], Loss: 0.03482205048203468
Epoch [2/10], Loss: 0.08203566074371338
Epoch [3/10], Loss: 0.025936244055628777
Epoch [4/10], Loss: 0.03734412044286728
Epoch [5/10], Loss: 0.021937096491456032
Epoch [6/10], Loss: 0.02395077422261238
Epoch [7/10], Loss: 0.013306856155395508
Epoch [8/10], Loss: 0.02083335444331169
Epoch [9/10], Loss: 0.014951078221201897
Epoch [10/10], Loss: 0.012693679891526699
Accuracy on the test set: 80.0%
网络维度784-512-128-32-10：/n
Epoch [1/10], Loss: 1.9411110877990723
Epoch [2/10], Los

In [14]:
"""Question b
   以下实验中设训练轮数为自变量，其他条件不变
   通过对训练轮数的逐步增加可以看出，本次实验的最佳训练轮数约为：20层左右
"""
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256, 10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

model = MLP()
while True:
    num_epochs = input("请输入训练次数，退出循环请输入Q：")
    if num_epochs == "Q":
        break
    num_epochs = int(num_epochs)
    main_func(batch_size_input,model)

请输入训练次数，退出循环请输入Q：5
Epoch [1/5], Loss: 0.9476597309112549
Epoch [2/5], Loss: 0.41452911496162415
Epoch [3/5], Loss: 0.19183017313480377
Epoch [4/5], Loss: 0.08031967282295227
Epoch [5/5], Loss: 0.038602314889431
Accuracy on the test set: 71.8%
请输入训练次数，退出循环请输入Q：10
Epoch [1/10], Loss: 0.028441285714507103
Epoch [2/10], Loss: 0.02254311740398407
Epoch [3/10], Loss: 0.014861349947750568
Epoch [4/10], Loss: 0.011741576716303825
Epoch [5/10], Loss: 0.0069843437522649765
Epoch [6/10], Loss: 0.004871167708188295
Epoch [7/10], Loss: 0.0024530577939003706
Epoch [8/10], Loss: 0.002004520269110799
Epoch [9/10], Loss: 0.001327943871729076
Epoch [10/10], Loss: 0.0010353925172239542
Accuracy on the test set: 72.2%
请输入训练次数，退出循环请输入Q：15
Epoch [1/15], Loss: 0.002173206303268671
Epoch [2/15], Loss: 0.005254743155092001
Epoch [3/15], Loss: 0.007638887036591768
Epoch [4/15], Loss: 0.020374532788991928
Epoch [5/15], Loss: 0.015342430211603642
Epoch [6/15], Loss: 0.011067785322666168
Epoch [7/15], Loss: 0.0162

In [21]:
"""Question c
   增加训练轮数而减少过拟合的发生，可以通过增大数据集容量或者减小神经网络层数或维度
   以下实验在Question b实验次数为25的基础上，逐步增加数据集容量，可以看出准确率有所上升，说明在一定程度上减少了过拟合的发生
"""
num_epochs = 35
while True:
    batch_size_input = input("请输入批量大小，退出循环请输入Q：")
    if batch_size_input == "Q":
        break
    batch_size_input = int(batch_size_input)
    main_func(batch_size_input,model)

请输入批量大小，退出循环请输入Q：64
Epoch [1/35], Loss: 0.020091824233531952
Epoch [2/35], Loss: 0.01933489553630352
Epoch [3/35], Loss: 0.018669946119189262
Epoch [4/35], Loss: 0.018077963963150978
Epoch [5/35], Loss: 0.01753084547817707
Epoch [6/35], Loss: 0.01703898049890995
Epoch [7/35], Loss: 0.016570089384913445
Epoch [8/35], Loss: 0.01613253355026245
Epoch [9/35], Loss: 0.015721073374152184
Epoch [10/35], Loss: 0.015331629663705826
Epoch [11/35], Loss: 0.014969516545534134
Epoch [12/35], Loss: 0.01462324894964695
Epoch [13/35], Loss: 0.014295547269284725
Epoch [14/35], Loss: 0.01398701686412096
Epoch [15/35], Loss: 0.01368897408246994
Epoch [16/35], Loss: 0.013401546515524387
Epoch [17/35], Loss: 0.013135567307472229
Epoch [18/35], Loss: 0.012876107357442379
Epoch [19/35], Loss: 0.012627055868506432
Epoch [20/35], Loss: 0.01239160168915987
Epoch [21/35], Loss: 0.012160449288785458
Epoch [22/35], Loss: 0.011945733800530434
Epoch [23/35], Loss: 0.011737335473299026
Epoch [24/35], Loss: 0.01153631

In [19]:
"""以下实验在Question b实验次数为25的基础上
   通过逐步减小神经网络层数，可以看出准确率有所上升，说明在一定程度上减少了过拟合的发生
"""

num_epochs = 35
batch_size_input = 128
print("四层网络：/n")
model_04 = MLP_04()
main_func(batch_size_input,model_04)

print("三层网络：/n")
model_03 = MLP_03()
main_func(batch_size_input,model_03)

print("双层网络：/n")
model_02 = MLP_02()
main_func(batch_size_input,model_02)


四层网络：/n
Epoch [1/35], Loss: 2.184281826019287
Epoch [2/35], Loss: 2.163968801498413
Epoch [3/35], Loss: 2.129973888397217
Epoch [4/35], Loss: 2.0471205711364746
Epoch [5/35], Loss: 1.8337873220443726
Epoch [6/35], Loss: 1.5935473442077637
Epoch [7/35], Loss: 1.5717135667800903
Epoch [8/35], Loss: 1.5681190490722656
Epoch [9/35], Loss: 1.4821841716766357
Epoch [10/35], Loss: 1.3865034580230713
Epoch [11/35], Loss: 1.321047067642212
Epoch [12/35], Loss: 1.2842506170272827
Epoch [13/35], Loss: 1.2176593542099
Epoch [14/35], Loss: 1.1150157451629639
Epoch [15/35], Loss: 0.987224817276001
Epoch [16/35], Loss: 0.9322144985198975
Epoch [17/35], Loss: 0.7739132642745972
Epoch [18/35], Loss: 0.8028359413146973
Epoch [19/35], Loss: 0.7161833047866821
Epoch [20/35], Loss: 0.6651313900947571
Epoch [21/35], Loss: 0.45327290892601013
Epoch [22/35], Loss: 0.305682510137558
Epoch [23/35], Loss: 0.23097744584083557
Epoch [24/35], Loss: 0.12011606991291046
Epoch [25/35], Loss: 0.09048458188772202
Epoch 

In [28]:
"""Question d
   以下网络分别使用不同的激活函数，可以看到不同的激活函数对本次实验有一定影响
   从准确率来看，本次实验最适合的激活函数为sigmoid函数"""
import math

num_epochs = 20
batch_size_input = 512

class MLP_08(nn.Module):
    def __init__(self):
        super(MLP_08, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256, 10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x
    
class MLP_09(nn.Module):
    def __init__(self):
        super(MLP_09, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.relu = torch.sigmoid
        self.fc2 = nn.Linear(256, 10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x
    
class MLP_10(nn.Module):
    def __init__(self):
        super(MLP_10, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.relu = torch.tanh
        self.fc2 = nn.Linear(256, 10)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x
    
print(" ReLU函数：/n")
model_08 = MLP_08()
main_func(batch_size_input,model_08)

print("sigmoid函数：/n")
model_09 = MLP_09()
main_func(batch_size_input,model_09)

print("tanh函数：/n")
model_10 = MLP_10()
main_func(batch_size_input,model_10)

 ReLU函数：/n
Epoch [1/20], Loss: 0.5295756459236145
Epoch [2/20], Loss: 0.157632976770401
Epoch [3/20], Loss: 0.0640597939491272
Epoch [4/20], Loss: 0.0363403782248497
Epoch [5/20], Loss: 0.024771438911557198
Epoch [6/20], Loss: 0.015076849609613419
Epoch [7/20], Loss: 0.010905900038778782
Epoch [8/20], Loss: 0.00753264594823122
Epoch [9/20], Loss: 0.00516259903088212
Epoch [10/20], Loss: 0.0031724858563393354
Epoch [11/20], Loss: 0.0014078239910304546
Epoch [12/20], Loss: 0.0007182164117693901
Epoch [13/20], Loss: 0.00039915222441777587
Epoch [14/20], Loss: 0.0002585315378382802
Epoch [15/20], Loss: 0.00014983485743869096
Epoch [16/20], Loss: 0.00010990492592100054
Epoch [17/20], Loss: 0.00010179955279454589
Epoch [18/20], Loss: 6.8662193370983e-05
Epoch [19/20], Loss: 4.708655978902243e-05
Epoch [20/20], Loss: 3.361645576660521e-05
Accuracy on the test set: 73.046875%
sigmoid函数：/n
Epoch [1/20], Loss: 1.8782379627227783
Epoch [2/20], Loss: 1.0648036003112793
Epoch [3/20], Loss: 0.728518

In [29]:
"""Question e
   不同的batch大小，可能会对训练过程产生不同的影响
   数据方面：使用大批次训练可以在一次运算中处理更多的样本数据（但对设备性能要求高）
             使用小批次训练可以提供更多的样本差异性，因为每个批次中的样本可能具有不同的特点。这可以帮助模型更好地泛化到未见样本，并减少过拟合的风险。
   优化方面：大批次训练可以减少参数更新的频率，因为每个批次的梯度平均值更稳定。
             小批次训练可以提供更频繁的参数更新，可能帮助模型更快地收敛。
   """

'Question e\n   不同的batch大小，可能会对训练过程产生不同的影响\n   数据方面：使用大批次训练可以在一次运算中处理更多的样本数据（但对设备性能要求高）\n             使用小批次训练可以提供更多的样本差异性，因为每个批次中的样本可能具有不同的特点。这可以帮助模型更好地泛化到未见样本，并减少过拟合的风险。\n   优化方面：大批次训练可以减少参数更新的频率，因为每个批次的梯度平均值更稳定。\n             小批次训练可以提供更频繁的参数更新，可能帮助模型更快地收敛。\n   '