In [46]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
from tqdm.auto import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
from torchvision.transforms import ToTensor
from modeltraning import load_fashion_mnist
from modeltraning import EarlyStopCallback
from modeltraning import training
from modeltraning import evaluating
from torchvision.transforms import Normalize


    



## 搭建模型

In [50]:
transforms = nn.Sequential(
    Normalize([0.2860], [0.3205]) # 这里的均值和标准差是通过train_ds计算得到的
)

class NeuralNetwork(nn.Module):
    def __init__(self, layers_num=2):
        super().__init__()
        self.transforms = transforms # 预处理层，标准化
        self.flatten = nn.Flatten()
        # 多加几层
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 100),
            nn.BatchNorm1d(100),
            nn.ReLU(),
        )
        # 加19层
        for i in range(1, layers_num):
            self.linear_relu_stack.add_module(f"Linear_{i}", nn.Linear(100, 100))
            self.linear_relu_stack.add_module(f"batchnorm_{i}", nn.BatchNorm1d(100))
            self.linear_relu_stack.add_module(f"relu", nn.ReLU())
        # 输出层
        self.linear_relu_stack.add_module("Output Layer", nn.Linear(100, 10))
        
        # 初始化权重
        self.init_weights()
        
    def init_weights(self):
        """使用 kaiming 均匀分布来初始化全连接层的权重 W"""
        # print('''初始化权重''')
        for m in self.modules():
            # print(m)
            # print('-'*50)
            if isinstance(m, nn.Linear):#判断m是否为全连接层
                # https://pytorch.org/docs/stable/nn.init.html
                nn.init.kaiming_uniform_(m.weight, nonlinearity='relu') # kaiming 均匀分布初始化权重
                nn.init.zeros_(m.bias) # 全零初始化偏置项
        # print('''初始化权重完成''')
    def forward(self, x):
        # x.shape [batch size, 1, 28, 28]
        x = self.transforms(x) #标准化
        x = self.flatten(x)  
        # 展平后 x.shape [batch size, 28 * 28]
        logits = self.linear_relu_stack(x)
        # logits.shape [batch size, 10]
        return logits
total=0
for idx, (key, value) in enumerate(NeuralNetwork(20).named_parameters()):
    total+=np.prod(value.shape)
total #模型参数数量

np.int64(275410)

In [42]:
train_loader,val_loader=load_fashion_mnist(batch_size=16)
epoch = 5
model = NeuralNetwork(layers_num=2)
loss_fct = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

early_stop_callback = EarlyStopCallback(patience=10, min_delta=0.001)
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)
model = model.to(device)

cpu


In [51]:
record = training(
    model,
    train_loader,
    val_loader,
    epoch,
    loss_fct,
    optimizer,
    device,
    early_stop_callback=early_stop_callback,
    eval_step=len(train_loader)
    )

100%|██████████| 18750/18750 [02:03<00:00, 151.62it/s, epoch=4]


In [53]:

model.eval()
loss, acc = evaluating(model, val_loader, loss_fct,device)
print(f"loss:     {loss:.4f}\naccuracy: {acc:.4f}")

loss:     0.3505
accuracy: 0.8781
