## 函数定义

In [19]:
import sys
sys.path.append("./python")
from tic_toc_timer import tic, toc
import numpy as np
import mpt
import mpt.nn as nn

# Wrtie acc to csv file
def writeAccToFile(train_acc, time, model_name):
    with open("./result/"+model_name+".csv", "w") as f:
        f.write("n*100batches, train_acc, time\n")
        for i in range(len(train_acc)):
            # using %
            f.write("{:}, {:.2f}%, {:} seconds\n".format(
                i+1,train_acc[i]*100, time[i]))
    print("Write to file successfully")


# Load Data
def loadData():
    data_dir = "./data/"
    batch_size = 100
    train_dataset = mpt.data.MNISTDataset(
        data_dir+"/train-images-idx3-ubyte.gz",
        data_dir+"/train-labels-idx1-ubyte.gz"
    )
    test_dataset = mpt.data.MNISTDataset(
        data_dir+"/t10k-images-idx3-ubyte.gz",
        data_dir+"/t10k-labels-idx1-ubyte.gz"
    )
    train_dataloader = mpt.data.DataLoader(
        dataset=train_dataset,
        batch_size=batch_size,
        shuffle=True
    )
    test_dataloader = mpt.data.DataLoader(
        dataset=test_dataset,
        batch_size=batch_size,
        shuffle=True
    )
    return train_dataset, train_dataloader, test_dataset, test_dataloader

# return: loss, error
def loss_err(h, y):
    lossModule = nn.SoftmaxLoss()
    return (
        lossModule.forward(h, y),
        np.sum(h.numpy().argmax(axis=1) != y.numpy(), dtype=np.float32)
    )

# epoch


def epoch(dataloader: mpt.data.DataLoader,
          model: mpt.nn.Module,
          opt: mpt.optim.Optimizer = None,
          # record acc, time
          accList=None,timeList=None):
    np.random.seed(4)

    if opt is None:
        model.eval()
    else:
        model.train()
    loss = 0
    err = 0
    num_sample = 0

    tic()
    for i, data in enumerate(dataloader):
        imgs = data[0]
        labels = data[1]
        forwardRes = model.forward(imgs)
        iLoss, iError = loss_err(forwardRes, labels)
        loss += iLoss.numpy()[0]
        err += iError
        num_sample += labels.shape[0]
        if opt is not None:
            iLoss.backward()
            opt.step()
        # every 50 batches, push acc and time to list
        if i % 50 == 0:
            time=toc().seconds
            accList.append(err/num_sample)
            timeList.append(time)
            tic()

    return (err/num_sample, loss/i)


# model
# Simple NN
def simple_nn():
    return nn.Sequential(
        nn.Linear(784, 20),
        nn.Linear(20, 10)
    ), "simple_nn"

# ResNet
def ResidualBlock(dim, hidden_dim, norm=nn.BatchNorm1d, drop_prob=0.1):
    return nn.Sequential(
        nn.Residual(
            nn.Sequential(
                nn.Linear(dim, hidden_dim),
                norm(hidden_dim),
                nn.ReLU(),
                nn.Dropout(drop_prob),
                nn.Linear(hidden_dim, dim),
                norm(dim))
        ),
        nn.ReLU()
    )


def MLPResNet(dim, hidden_dim=100, num_blocks=3, num_classes=10, norm=nn.BatchNorm1d, drop_prob=0.1):
    ls = [nn.Linear(dim, hidden_dim), nn.ReLU()]
    for _ in range(num_blocks):
        ls.append(
            ResidualBlock(hidden_dim, hidden_dim//2,
                          norm, drop_prob)
        )
    ls.append(nn.Linear(hidden_dim, num_classes))  # 分类层
    return nn.Sequential(*ls), "MLPResNet"


## 训练对比

In [20]:
train_acc, train_loss = None, None

modelSimpleNN = simple_nn()
modelResNet = MLPResNet(784, hidden_dim=100)
optSGD = mpt.optim.SGD
optAdam = mpt.optim.Adam


# train 2*2, and visualize
modelList = [modelSimpleNN]
# modelList = [modelSimpleNN, modelResNet]
optList = [optSGD, optAdam]

accList = [1,2,3]
timeList = [1,2,3]
for (model, opt) in [(model, opt) for model in modelList for opt in optList]:
    [train_dataset, train_dataloader, test_dataset, test_dataloader] = loadData()
    thisOpt = opt(model[0].parameters(), lr=0.1, weight_decay=0.001)
    print(model[1], type(thisOpt).__name__)
    print("TRAIN")
    print("Err | Loss")
    for _ in range(10):
        train_acc, train_loss = epoch(
            train_dataloader, model=model[0], opt=thisOpt,
            accList=accList, timeList=timeList)
    print("TEST")
    test_acc, test_loss = epoch(test_dataloader, model=model[0], opt=None,
                                accList=accList, timeList=timeList)
    
    writeAccToFile(accList, timeList, model[1]+"_"+type(thisOpt).__name__)


simple_nn SGD
TRAIN
Err | Loss


In [None]:
# use different learning rate
train_acc, train_loss = None, None

modelSimpleNN = simple_nn()
modelResNet = MLPResNet(784, hidden_dim=100)
optSGD = mpt.optim.SGD
optAdam = mpt.optim.Adam


# train 2*2, and visualize
modelList = [modelSimpleNN, modelResNet]
optList = [optSGD, optAdam]

train_accList = []
train_lossList = []

test_accList = []
test_lossList = []
timeList = []
for (model, opt) in [(model, opt) for model in modelList for opt in optList]:
    [train_dataset, train_dataloader, test_dataset, test_dataloader] = loadData()
    thisOpt = opt(model[0].parameters(), lr=0.001, weight_decay=0.001)
    print(model[1], opt)
    print("Err | Loss")
    tic()
    for _ in range(10):
        train_acc, train_loss = epoch(
            train_dataloader, model=model[0], opt=thisOpt)
    print("TEST")
    test_acc, test_loss = epoch(test_dataloader, model=model[0])
    t = toc()

    timeList.append(t)
    train_lossList.append(train_loss)
    test_lossList.append(test_loss)
    train_accList.append(train_acc)
    test_accList.append(test_acc)
    print("time: ", t, " seconds")


NameError: name 'simple_nn' is not defined

In [None]:
print("train_accList = ",train_accList)
print("train_lossList = ",train_lossList)
print("test_accList = ",test_accList)
print("test_lossList = ",test_lossList)
print("timeList = ",timeList)

train_accList =  [0.15671666666666667, 0.0758, 0.11516666666666667, 0.014366666666666666]
train_lossList =  [0.5967742698391278, 0.27257578710714975, 0.3863513117780288, 0.045549460020071514]
test_accList =  [0.1456, 0.0763, 0.0895, 0.0297]
test_lossList =  [0.5570591828227043, 0.27045953899621966, 0.2983111513406038, 0.09459613444283604]
timeList =  [datetime.timedelta(seconds=64, microseconds=939863), datetime.timedelta(seconds=118, microseconds=871126), datetime.timedelta(seconds=711, microseconds=642097), datetime.timedelta(seconds=840, microseconds=721068)]


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn as nn

# 定义残差块


class ResidualBlock(nn.Module):
    def __init__(self, dim, hidden_dim, norm=nn.BatchNorm1d, drop_prob=0.1):
        super(ResidualBlock, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            norm(hidden_dim),
            nn.ReLU(),
            nn.Dropout(drop_prob),
            nn.Linear(hidden_dim, dim),
            norm(dim),
            nn.ReLU()
        )

    def forward(self, x):
        return self.net(x) + x

# 定义MLPResNet模型


class MLPResNet(nn.Module):
    def __init__(self, dim, hidden_dim=100, num_blocks=3, num_classes=10, norm=nn.BatchNorm1d, drop_prob=0.1):
        super(MLPResNet, self).__init__()
        # 定义模型结构
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.ReLU()
        )
        for _ in range(num_blocks):
            self.net.add_module(
                'residual_block{}'.format(_),
                ResidualBlock(hidden_dim, hidden_dim // 2, norm, drop_prob)
            )
        self.net.add_module(
            'fc',
            nn.Linear(hidden_dim, num_classes)
        )
        self.net.add_module(
            'log_softmax',
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.net(x)
        return x


# 定义超参数
batch_size = 64
learning_rate = 0.1
num_epochs = 10

# 加载MNIST数据集
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True,
                   transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=True)

# 初始化模型并定义优化器和损失函数
model = MLPResNet(dim=784)
device = torch.device("cpu")
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

lossListPytorch = []

# 训练模型
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        # 将数据加载到CPU
        data, target = data.to(device), target.to(device)

        # 前向传播
        output = model(data.view(-1, 784))

        # 计算损失
        loss = criterion(output, target)

        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    lossListPytorch.append(loss.item)

# 在测试集上计算模型准确率
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        # 将数据加载到CPU
        data, target = data.to(device), target.to(device)

        # 前向传播
        output = model(data.view(-1, 784))

        # 计算损失
        test_loss += criterion(output, target).item()

        # 统计预测正确的样本数量
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

# 打印测试集上的结果
test_loss /= len(test_loader.dataset)
print('Epoch: {} Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
    epoch, test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

print("lossListPytorch", lossListPytorch)


Epoch: 9 Test set: Average loss: 0.0014, Accuracy: 9754/10000 (98%)
lossListPytorch [<built-in method item of Tensor object at 0x7f5f294b9170>, <built-in method item of Tensor object at 0x7f5f294253a0>, <built-in method item of Tensor object at 0x7f5f276bff60>, <built-in method item of Tensor object at 0x7f5f276bf0b0>, <built-in method item of Tensor object at 0x7f5f276bff10>, <built-in method item of Tensor object at 0x7f5f27636cf0>, <built-in method item of Tensor object at 0x7f5f276bf740>, <built-in method item of Tensor object at 0x7f5f812e6b60>, <built-in method item of Tensor object at 0x7f5f276be340>, <built-in method item of Tensor object at 0x7f5f276be7a0>]


## 训练对比

In [None]:
train_acc, train_loss = None, None

modelSimpleNN = simple_nn()
modelResNet = MLPResNet(784, hidden_dim=100)
optSGD = mpt.optim.SGD
optAdam = mpt.optim.Adam


# train 2*2, and visualize
modelList = [modelSimpleNN, modelResNet]
optList = [optSGD, optAdam]

train_accList = []
train_lossList = []

test_accList = []
test_lossList = []
timeList = []
for (model, opt) in [(model, opt) for model in modelList for opt in optList]:
    [train_dataset, train_dataloader, test_dataset, test_dataloader] = loadData()
    thisOpt = opt(model[0].parameters(), lr=0.1, weight_decay=0.001)
    print(model[1], opt)
    print("Err | Loss")
    tic()
    for _ in range(10):
        train_acc, train_loss = epoch(
            train_dataloader, model=model[0], opt=thisOpt)
    print("TEST")
    test_acc, test_loss = epoch(test_dataloader, model=model[0])
    t = toc()

    timeList.append(t)
    train_lossList.append(train_loss)
    test_lossList.append(test_loss)
    train_accList.append(train_acc)
    test_accList.append(test_acc)
    print("time: ", t)


simple_nn <class 'mpt.optim.SGD'>
Err | Loss
12.98% |   0.45212
9.01% |   0.31810
8.47% |   0.30048
8.21% |   0.29172
8.07% |   0.28632
7.96% |   0.28259
7.88% |   0.27984
7.81% |   0.27772
7.72% |   0.27603
7.67% |   0.27465
7.85% |   0.27633
0.07850 |   0.27633 TEST
time:  0:01:12.062722  seconds
simple_nn <class 'mpt.optim.Adam'>
Err | Loss
16.08% |   0.94952
19.38% |   0.90033
18.37% |   0.97569
18.77% |   0.83938
20.31% |   1.36762
16.59% |   0.76471
19.85% |   1.21961
17.31% |   1.24014
17.76% |   0.78394
18.03% |   1.14910
16.88% |   0.63192
0.16880 |   0.63192 TEST
time:  0:01:59.496490  seconds
MLPResNet <class 'mpt.optim.SGD'>
Err | Loss
10.24% |   0.33304
4.81% |   0.15718
3.42% |   0.11381
2.58% |   0.08759
1.99% |   0.06944
1.56% |   0.05647
1.27% |   0.04624
1.03% |   0.03872
0.87% |   0.03335
0.75% |   0.03009
2.60% |   0.09090
0.02600 |   0.09090 TEST
time:  0:11:15.112961  seconds
MLPResNet <class 'mpt.optim.Adam'>
Err | Loss
19.14% |   0.80925
21.85% |   0.80273
19.65

In [None]:
print("train_accList:\t",train_accList)
print("train_lossList:\t",train_lossList)
print("test_accList:\t",test_accList)
print("test_lossList:\t",test_lossList)
print("timeList:\t",timeList)

train_accList:	 [0.07673333333333333, 0.1803, 0.007533333333333334, 0.2472]
train_lossList:	 [0.2746484576165676, 1.1491027573744457, 0.030090059110273917, 0.7930528383950393]
test_accList:	 [0.0785, 0.1688, 0.026, 0.2113]
test_lossList:	 [0.2763303181529045, 0.6319184058904648, 0.09090447120834141, 0.6953959873318672]
timeList:	 [datetime.timedelta(seconds=72, microseconds=62722), datetime.timedelta(seconds=119, microseconds=496490), datetime.timedelta(seconds=675, microseconds=112961), datetime.timedelta(seconds=693, microseconds=241430)]


In [None]:
# use different learning rate
train_acc, train_loss = None, None

modelSimpleNN = simple_nn()
modelResNet = MLPResNet(784, hidden_dim=100)
optSGD = mpt.optim.SGD
optAdam = mpt.optim.Adam


# train 2*2, and visualize
modelList = [modelSimpleNN, modelResNet]
optList = [optSGD, optAdam]

train_accList = []
train_lossList = []

test_accList = []
test_lossList = []
timeList = []
for (model, opt) in [(model, opt) for model in modelList for opt in optList]:
    [train_dataset, train_dataloader, test_dataset, test_dataloader] = loadData()
    thisOpt = opt(model[0].parameters(), lr=0.001, weight_decay=0.001)
    print(model[1], opt)
    print("Err | Loss")
    tic()
    for _ in range(10):
        train_acc, train_loss = epoch(
            train_dataloader, model=model[0], opt=thisOpt)
    print("TEST")
    test_acc, test_loss = epoch(test_dataloader, model=model[0])
    t = toc()

    timeList.append(t)
    train_lossList.append(train_loss)
    test_lossList.append(test_loss)
    train_accList.append(train_acc)
    test_accList.append(test_acc)
    print("time: ", t, " seconds")


NameError: name 'simple_nn' is not defined

In [None]:
print("train_accList = ",train_accList)
print("train_lossList = ",train_lossList)
print("test_accList = ",test_accList)
print("test_lossList = ",test_lossList)
print("timeList = ",timeList)

train_accList =  [0.15671666666666667, 0.0758, 0.11516666666666667, 0.014366666666666666]
train_lossList =  [0.5967742698391278, 0.27257578710714975, 0.3863513117780288, 0.045549460020071514]
test_accList =  [0.1456, 0.0763, 0.0895, 0.0297]
test_lossList =  [0.5570591828227043, 0.27045953899621966, 0.2983111513406038, 0.09459613444283604]
timeList =  [datetime.timedelta(seconds=64, microseconds=939863), datetime.timedelta(seconds=118, microseconds=871126), datetime.timedelta(seconds=711, microseconds=642097), datetime.timedelta(seconds=840, microseconds=721068)]


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torch.nn as nn

# 定义残差块


class ResidualBlock(nn.Module):
    def __init__(self, dim, hidden_dim, norm=nn.BatchNorm1d, drop_prob=0.1):
        super(ResidualBlock, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            norm(hidden_dim),
            nn.ReLU(),
            nn.Dropout(drop_prob),
            nn.Linear(hidden_dim, dim),
            norm(dim),
            nn.ReLU()
        )

    def forward(self, x):
        return self.net(x) + x

# 定义MLPResNet模型


class MLPResNet(nn.Module):
    def __init__(self, dim, hidden_dim=100, num_blocks=3, num_classes=10, norm=nn.BatchNorm1d, drop_prob=0.1):
        super(MLPResNet, self).__init__()
        # 定义模型结构
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.ReLU()
        )
        for _ in range(num_blocks):
            self.net.add_module(
                'residual_block{}'.format(_),
                ResidualBlock(hidden_dim, hidden_dim // 2, norm, drop_prob)
            )
        self.net.add_module(
            'fc',
            nn.Linear(hidden_dim, num_classes)
        )
        self.net.add_module(
            'log_softmax',
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.net(x)
        return x


# 定义超参数
batch_size = 64
learning_rate = 0.1
num_epochs = 10

# 加载MNIST数据集
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True,
                   transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=True)

# 初始化模型并定义优化器和损失函数
model = MLPResNet(dim=784)
device = torch.device("cpu")
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

lossListPytorch = []

# 训练模型
for epoch in range(num_epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        # 将数据加载到CPU
        data, target = data.to(device), target.to(device)

        # 前向传播
        output = model(data.view(-1, 784))

        # 计算损失
        loss = criterion(output, target)

        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    lossListPytorch.append(loss.item)

# 在测试集上计算模型准确率
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        # 将数据加载到CPU
        data, target = data.to(device), target.to(device)

        # 前向传播
        output = model(data.view(-1, 784))

        # 计算损失
        test_loss += criterion(output, target).item()

        # 统计预测正确的样本数量
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

# 打印测试集上的结果
test_loss /= len(test_loader.dataset)
print('Epoch: {} Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
    epoch, test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

print("lossListPytorch", lossListPytorch)


Epoch: 9 Test set: Average loss: 0.0014, Accuracy: 9754/10000 (98%)
lossListPytorch [<built-in method item of Tensor object at 0x7f5f294b9170>, <built-in method item of Tensor object at 0x7f5f294253a0>, <built-in method item of Tensor object at 0x7f5f276bff60>, <built-in method item of Tensor object at 0x7f5f276bf0b0>, <built-in method item of Tensor object at 0x7f5f276bff10>, <built-in method item of Tensor object at 0x7f5f27636cf0>, <built-in method item of Tensor object at 0x7f5f276bf740>, <built-in method item of Tensor object at 0x7f5f812e6b60>, <built-in method item of Tensor object at 0x7f5f276be340>, <built-in method item of Tensor object at 0x7f5f276be7a0>]
