In [15]:
import torch
from torch import nn
from torchsummary import summary
from torchvision.datasets import FashionMNIST
from torchvision import transforms
import torch.utils.data as Data
import numpy as np

In [16]:
class LeNet(nn.Module):
    def __init__(self):  # 初始化
        super(LeNet, self).__init__()
        self.con1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2)
        self.sig = nn.Sigmoid()
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.con2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()

        self.fc1 = nn.Linear(5*5*16, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x): # 前向傳播定義
        x = self.sig(self.con1(x))
        x = self.pool1(x)
        x = self.sig(self.con2(x))
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

if __name__ =="__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = LeNet().to(device)
    print(summary(model, (1, 28, 28)))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
           Sigmoid-2            [-1, 6, 28, 28]               0
         AvgPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
           Sigmoid-5           [-1, 16, 10, 10]               0
         AvgPool2d-6             [-1, 16, 5, 5]               0
           Flatten-7                  [-1, 400]               0
            Linear-8                  [-1, 120]          48,120
            Linear-9                   [-1, 84]          10,164
           Linear-10                   [-1, 10]             850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.11
Params size (MB): 0.24
Estimated Tot

In [17]:
def data_preprocessing():
    data = FashionMNIST(root='./data',
                    train=True,
                    transform=transforms.Compose([transforms.Resize(size=224), transforms.ToTensor()]),
                    download=True)
    train, val = Data.random_split(data, [round(0.8*len(data)), round(0.2*len(data))])
    train_loader = Data.DataLoader(dataset=train,
                                  batch_size=128,
                                  shuffle=True,
                                  num_workers=0)
    val_loader = Data.DataLoader(dataset=val,
                                  batch_size=128,
                                  shuffle=True,
                                  num_workers=0)
    return train_loader, val_loader
train, val = data_preprocessing()

In [18]:
def train_model_process(model, train, val, num_epochs):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 優化器
    criterion = nn.CrossEntropyLoss() # 損失函數

    model = model.to(device)
    best_model_wts = copy.deepcopy(model.state_dict()) #複製當前模型參數
    # 初始化參數
    best_acc = 0.0
    train_loss_all = []
    train_acc_all = []
    val_loss_all = []
    val_acc_all = []

    since = time.time()

    for epoch in range(num_epochs):
        print("Epoch {}/{}".format(epoch, num_epochs-1))
        print("-"*10)

        train_loss = 0.0
        train_accuracy = 0
        val_loss = 0.0
        val_accuracy = 0

        train_num = 0
        val_num = 0

        for step, (feature, label) in enumerate(train):
            # 將資料放到訓練設備中
            feature = feature.to(device)
            label = label.to(device)
            # 將模型設為訓練模式
            model.train()
    
            # 前向傳播
            output = model(feature)
            predict = torch.argmax(output, dim=1)
            # 計算每個batch的損失函數
            loss = criterion(output, label)
            
            # 初始化梯度
            optimizer.zero_grad()
            #反向傳播
            loss.backword()
            # 透過優化器更新參數
            optimizer.step()
            # 累加損失函數
            train_loss += loss.item() * feature.size()
            # 若預測正確 準確度增加
            train_accuracy += torch.sum(predict == label.data)
            train_num += feature.size