# Pytorch构建神经网络(三)(29-33节)

## 4.1&4.2 使用tensorboard可视化CNN训练指标

* pytorch1.1.0以上的版本已经自动增加了tensorboard
* 在终端输入“tensorboard --version”可查看tensorboard的版本
* 在终端输入“tensorboard --logdir=runs”进入tensorboard(在写了tensorboard数据的路径下)
* 此tensorboard和tensorflow的是同一个，pytorch需要1.15版本以后，tensorflow1.13需要tensorboard<=1.14

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from torch.utils.tensorboard import SummaryWriter

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)


<torch.autograd.grad_mode.set_grad_enabled at 0x7f62cc639198>

In [2]:
print(torch.__version__)
# print(torchvision.__version__)

1.2.0+cu92


In [3]:
def get_num_correct(preds,labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [4]:
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
    
    def forward(self, t):
        t = t 
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = t.reshape(-1, 12*4*4)  # t.flatten(start_dim=1)
        t = F.relu(self.fc1(t))
        
        t = F.relu(self.fc2(t))
        
        t = self.out(t)
        return t
        

In [5]:
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)

In [8]:
tb = SummaryWriter()
nextwork = Network()
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)
tb.add_image('images', grid)
tb.add_graph(nextwork, images)
tb.close
# 难用

<bound method SummaryWriter.close of <torch.utils.tensorboard.writer.SummaryWriter object at 0x7fac0c5eefd0>>

### Starting out with TensorBoard (Network Graph and Images)

In [11]:
#batch_size = 100
#lr =0.01
# 对不同的batchsize，lr的训练情况进行比较
# 方法1：但此方法需要多层for循环
batch_size_list = [100, 1000, 10000]
lr_list = [.01, .001, .0001, .00001]
for batch_size in batch_size_list:
    for lr in lr_list:
        network = Network()
        train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
        images, labels = next(iter(train_loader))
        grid = torchvision.utils.make_grid(images)   # 创建能在tensorboard中查看的图像网格

        comment = f'batch_size={batch_size} lr ={lr}'
        tb = SummaryWriter(comment=comment)   # 在Summary Writer添加该注释，可帮助我们在tensorboard中唯一地识别该表示
        tb.add_image('images', grid)  # 将一批图像放在grid中进行显示
        tb.add_graph(network, images)   # 在tensorboard中看见网络结构的可视化图
        optimizer = optim.Adam(network.parameters(), lr=lr)

        for epoch in range(5):
    
            total_loss = 0
            total_correct = 0
    
            for batch in train_loader:    # Get Batch
                images, labels = batch
        
                preds = network(images) # Pass Batch
                loss = F.cross_entropy(preds, labels)  # Calculate loss
        
                optimizer.zero_grad()    # 梯度清零，否则会累加
                loss.backward()     # Calculate Gradients
                optimizer.step()    # Update Weights
        
                #total_loss += loss.item()
                total_loss += loss.item()*batch_size # 在对不同批次下的训练进行比较时，这样做可使结果更具有可比性
                total_correct += get_num_correct(preds, labels)
        
            tb.add_scalar("Loss", total_loss, epoch)
            tb.add_scalar("Number Correct", total_correct, epoch)
            tb.add_scalar("Accuracy", total_correct/len(train_set), epoch)
            '''
            这种表达方式只能看单个层的偏置，权重，及其梯度的变化趋势，无法看到全部的
            tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
            tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
            tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)
            '''
            for name, weight in network.named_parameters():
                tb.add_histogram(name, weight, epoch)
                tb.add_histogram(f'{name}.grad', weight.grad, epoch)
            print("epoch:", epoch, "total_correct:", total_correct, "loss", total_loss)

tb.close()

epoch: 0 total_correct: 45971 loss 36999.32823777199
epoch: 1 total_correct: 50668 loss 25344.60060596466
epoch: 2 total_correct: 51639 loss 22977.645768225193
epoch: 3 total_correct: 51812 loss 22043.522529304028
epoch: 4 total_correct: 52007 loss 21628.90024483204
epoch: 0 total_correct: 42332 loss 47667.41223335266
epoch: 1 total_correct: 48750 loss 30770.001539587975
epoch: 2 total_correct: 50425 loss 26484.86860394478
epoch: 3 total_correct: 51401 loss 23829.722325503826
epoch: 4 total_correct: 51848 loss 22179.082918167114
epoch: 0 total_correct: 32193 loss 83383.92381668091
epoch: 1 total_correct: 43321 loss 44253.26535701752
epoch: 2 total_correct: 44664 loss 39717.6597237587
epoch: 3 total_correct: 45693 loss 36959.0406537056
epoch: 4 total_correct: 46526 loss 34960.721066594124
epoch: 0 total_correct: 6031 loss 137695.96047401428
epoch: 1 total_correct: 17646 loss 131961.98852062225
epoch: 2 total_correct: 27459 loss 113171.54141664505
epoch: 3 total_correct: 35936 loss 88529

#### 对多层的偏置，权重及其梯度进行访问的原理

In [6]:
nextwork = Network()
for name,weight in network.named_parameters():
    print(name, weight.shape)

NameError: name 'network' is not defined

In [7]:
for name,weight in network.named_parameters():
    print(f'{name}.grad', weight.grad.shape)

NameError: name 'network' is not defined

#### 更简单的方法对要更改的参数进行访问

In [8]:
from itertools import product  

In [9]:
parameters = dict(
    lr = [.01, .001],
    batc_size = [10, 100, 1000],
    shuffle = [True, False]
)

In [10]:
param_values = [v for v in parameters.values()]
param_values

[[0.01, 0.001], [10, 100, 1000], [True, False]]

In [11]:
for lr, batch_size, shuffle in product(*param_values):
    print(lr, batch_size, shuffle)

0.01 10 True
0.01 10 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 10 True
0.001 10 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


In [18]:
#batch_size = 100
#lr =0.01
# 对不同的batchsize，lr的训练情况进行比较
# 方法2：只需一层循环
from itertools import product
parameters = dict(
    lr = [.01, .001],
    batch_size = [10, 100],
    shuffle = [True, False]
)
param_values = [v for v in parameters.values()]
print(param_values)
for lr, batch_size, shuffle in product(*param_values):
    network = Network()
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)   # 创建能在tensorboard中查看的图像网格

    comment = f'batch_size={batch_size} lr ={lr} shuffle={shuffle}'
    tb = SummaryWriter(comment=comment)   # 在Summary Writer添加该注释，可帮助我们在tensorboard中唯一地识别该表示
    tb.add_image('images', grid)  # 将一批图像放在grid中进行显示
    tb.add_graph(network, images)   # 在tensorboard中看见网络结构的可视化图
    optimizer = optim.Adam(network.parameters(), lr=lr)

    for epoch in range(5):
    
        total_loss = 0
        total_correct = 0
    
        for batch in train_loader:    # Get Batch
            images, labels = batch
        
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels)  # Calculate loss
        
            optimizer.zero_grad()    # 梯度清零，否则会累加
            loss.backward()     # Calculate Gradients
            optimizer.step()    # Update Weights
        
            #total_loss += loss.item()
            total_loss += loss.item()*batch_size # 在对不同批次下的训练进行比较时，这样做可使结果更具有可比性
            total_correct += get_num_correct(preds, labels)
        
        tb.add_scalar("Loss", total_loss, epoch)
        tb.add_scalar("Number Correct", total_correct, epoch)
        tb.add_scalar("Accuracy", total_correct/len(train_set), epoch)
        '''
            这种表达方式只能看单个层的偏置，权重，及其梯度的变化趋势，无法看到全部的
            tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
            tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
            tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)
        '''
        for name, weight in network.named_parameters():
            tb.add_histogram(name, weight, epoch)
            tb.add_histogram(f'{name}.grad', weight.grad, epoch)
        print("epoch:", epoch, "total_correct:", total_correct, "loss", total_loss)

tb.close()

[[0.01, 0.001], [10, 100], [True, False]]
epoch: 0 total_correct: 45904 loss 37692.4475050997
epoch: 1 total_correct: 48537 loss 31444.695247542113
epoch: 2 total_correct: 48885 loss 30522.319433526136
epoch: 3 total_correct: 49032 loss 30967.423776197247
epoch: 4 total_correct: 48843 loss 31455.53153772489
epoch: 0 total_correct: 45678 loss 39251.742211058736
epoch: 1 total_correct: 48120 loss 33035.97664508503
epoch: 2 total_correct: 48184 loss 32827.95681891381
epoch: 3 total_correct: 48409 loss 32354.417913984507
epoch: 4 total_correct: 48635 loss 32306.34260468185
epoch: 0 total_correct: 45633 loss 37456.57370686531
epoch: 1 total_correct: 50822 loss 25002.635471522808
epoch: 2 total_correct: 51650 loss 22562.59504109621
epoch: 3 total_correct: 52130 loss 21302.11407393217
epoch: 4 total_correct: 52284 loss 20717.984664440155
epoch: 0 total_correct: 46511 loss 35531.40455186367
epoch: 1 total_correct: 51283 loss 23556.707997620106
epoch: 2 total_correct: 52074 loss 21302.570855617

## 4.3 RunBuilder类的编写
* 该类的编写允许我们使用不同的参数值生成多个运行

In [12]:
from collections import OrderedDict
from collections import namedtuple
from itertools import product

In [13]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        Run = namedtuple('Run', params.keys())
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
        return runs

In [14]:
params = OrderedDict(
    lr = [.01, .001],
    batch_size = [1000, 10000]
)
params

OrderedDict([('lr', [0.01, 0.001]), ('batch_size', [1000, 10000])])

In [15]:
runs = RunBuilder.get_runs(params)
runs

[Run(lr=0.01, batch_size=1000),
 Run(lr=0.01, batch_size=10000),
 Run(lr=0.001, batch_size=1000),
 Run(lr=0.001, batch_size=10000)]

In [16]:
for run in runs:
    print(run, run.lr, run.batch_size)

Run(lr=0.01, batch_size=1000) 0.01 1000
Run(lr=0.01, batch_size=10000) 0.01 10000
Run(lr=0.001, batch_size=1000) 0.001 1000
Run(lr=0.001, batch_size=10000) 0.001 10000


In [17]:
# 创建RunBuilder类以后，comment表示为：
for run in RunBuilder.get_runs(params):
    comment = f'-{run}'
    print(comment)

-Run(lr=0.01, batch_size=1000)
-Run(lr=0.01, batch_size=10000)
-Run(lr=0.001, batch_size=1000)
-Run(lr=0.001, batch_size=10000)


# 4.4 如何试验大量的超参数
* 构建RunManager类可实现对大量超参数的试验

In [76]:
import time
import pandas as pd
from IPython.display import clear_output
import json
class RunManager():
    def __init__(self):
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        self.network = None
        self.loader = None
        self.tb = None
        
    def begin_run(self, run, network, loader):
        self.run_start_time = time.time()
        
        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('images', grid)
        self.tb.add_graph(self.network, images.to(getattr(run, 'device','cpu')))
        
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0
        
    def begin_epoch(self):
        self.epoch_start_time = time.time()
        
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
    
    def end_epoch(self):
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        loss = self.epoch_loss/len(self.loader.dataset)
        accuracy = self.epoch_num_correct/len(self.loader.dataset)
        
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
            
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration
        for k,v in self.run_params._asdict().items(): results[k] = v
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')

        clear_output(wait=True)
        display(df)
    
    def track_loss(self, loss):
        self.epoch_loss += loss.item()*self.loader.batch_size
    
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
    
    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        pd.DataFrame.from_dict(
            self.run_data,
            orient='columns').to_csv(f'{fileName}.csv')
        with open(f'{fileName}.json','w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [32]:
# 使用RunManager和RunBuilder类可以使得程序更易扩展
params = OrderedDict(
    lr = [.01],
    batch_size =[1000, 2000],
    shuffle = [True, False]
)
m = RunManager()
for run in RunBuilder.get_runs(params):
    
    network = Network()
    loader = torch.utils.data.DataLoader(train_set, batch_size=run.batch_size, shuffle=run.shuffle)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(5):
        m.begin_epoch()
        for batch in loader:
            images, labels = batch
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss)
            m.track_num_correct(preds, labels)
            
        m.end_epoch()
    m.end_run()
m.save('resuls')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,0.932201,0.637467,7.571928,8.019249,0.01,1000,True
1,1,2,0.532753,0.787517,7.663883,15.779657,0.01,1000,True
2,1,3,0.444505,0.83335,7.880686,23.771249,0.01,1000,True
3,1,4,0.384363,0.858267,7.423034,31.302291,0.01,1000,True
4,1,5,0.353058,0.87015,7.604582,38.983821,0.01,1000,True
5,2,1,1.00195,0.609783,7.495701,8.007776,0.01,1000,False
6,2,2,0.54477,0.7892,7.610538,15.701612,0.01,1000,False
7,2,3,0.454579,0.83055,7.568527,23.409861,0.01,1000,False
8,2,4,0.398927,0.853917,7.706451,31.261828,0.01,1000,False
9,2,5,0.355898,0.87,8.088928,39.482155,0.01,1000,False


# 4.5 使用DataLoader的多进程功能加速神经网络训练

* 使用data loader类的num_workers可选属性可加速神经网络的训练
* num_workers属性告诉data loader实例有多少个单元处理器用于数据加载
* num_workers值的选择的最好方式是进行试验

In [35]:
# 使用RunManager和RunBuilder类可以使得程序更易扩展
params = OrderedDict(
    lr = [.01],
    batch_size =[1000, 2000],
    shuffle = [True, False],
    num_workers = [0,1,2,4,8,16]
)
m = RunManager()
for run in RunBuilder.get_runs(params):
    
    network = Network()
    loader = torch.utils.data.DataLoader(train_set, batch_size=run.batch_size, shuffle=run.shuffle, num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(5):
        m.begin_epoch()
        for batch in loader:
            images, labels = batch
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss)
            m.track_num_correct(preds, labels)
            
            
        m.end_epoch()
    m.end_run()
m.save('resuls')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers
0,1,1,1.017686,0.603217,7.153481,7.612322,0.01,1000,True,0
1,1,2,0.537553,0.792367,7.217106,14.927855,0.01,1000,True,0
2,1,3,0.446319,0.835417,7.449468,22.500605,0.01,1000,True,0
3,1,4,0.394627,0.854567,7.328318,29.944257,0.01,1000,True,0
4,1,5,0.351331,0.872133,7.525430,37.609182,0.01,1000,True,0
...,...,...,...,...,...,...,...,...,...,...
115,24,1,1.248348,0.523100,4.440034,6.248093,0.01,2000,False,16
116,24,2,0.668257,0.739717,4.897284,11.271547,0.01,2000,False,16
117,24,3,0.553055,0.785450,4.842251,16.255508,0.01,2000,False,16
118,24,4,0.489422,0.816467,4.698927,21.058644,0.01,2000,False,16


Moving to GPU


In [42]:
t = torch.ones(1,1,28,28)
t.shape

torch.Size([1, 1, 28, 28])

In [43]:
network = Network()

In [44]:
t = t.cuda()
network = network.cuda()

In [45]:
gpu_pred = network(t)
gpu_pred.device

device(type='cuda', index=0)

In [46]:
gpu_pred

tensor([[ 0.1275,  0.1232,  0.0004,  0.0912, -0.0668, -0.0127, -0.0867, -0.0178,  0.0883, -0.1317]], device='cuda:0',
       grad_fn=<AddmmBackward>)

Moving to CPU

In [47]:
t = t.cpu()
network = network.cpu()

In [48]:
cpu_pred = network(t)
cpu_pred.device

device(type='cpu')

Working with Tensors

In [51]:
t1 = torch.tensor([
    [1,2],
    [3,4]
])

t2 = torch.tensor([
    [5,6],
    [7,8]
])

In [52]:
t1.device,t2.device

(device(type='cpu'), device(type='cpu'))

In [53]:
t1 = t1.to('cuda')

In [54]:
t1.device

device(type='cuda', index=0)

In [55]:
try: t1 + t2
except Exception as e: print(e)

expected device cuda:0 and dtype Long but got device cpu and dtype Long


In [56]:
try: t2 + t1
except Exception as e: print(e)

expected device cpu and dtype Long but got device cuda:0 and dtype Long


In [57]:
t2 = t2.to('cuda')

In [58]:
t1 + t2

tensor([[ 6,  8],
        [10, 12]], device='cuda:0')

Working with Neural Network Modules

In [61]:
network = Network()

In [62]:
for name, param in network.named_parameters():
    print(name, '\t\t', param.shape)

conv1.weight 		 torch.Size([6, 1, 5, 5])
conv1.bias 		 torch.Size([6])
conv2.weight 		 torch.Size([12, 6, 5, 5])
conv2.bias 		 torch.Size([12])
fc1.weight 		 torch.Size([120, 192])
fc1.bias 		 torch.Size([120])
fc2.weight 		 torch.Size([60, 120])
fc2.bias 		 torch.Size([60])
out.weight 		 torch.Size([10, 60])
out.bias 		 torch.Size([10])


In [63]:
# 是网络的权重在gpu，不是网络在gpu
for n, p in network.named_parameters():
    print(p.device,' ', n)

cpu   conv1.weight
cpu   conv1.bias
cpu   conv2.weight
cpu   conv2.bias
cpu   fc1.weight
cpu   fc1.bias
cpu   fc2.weight
cpu   fc2.bias
cpu   out.weight
cpu   out.bias


In [64]:
network = network.to('cuda')

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

In [65]:
for n, p in network.named_parameters():
    print(p.device,' ', n)

cuda:0   conv1.weight
cuda:0   conv1.bias
cuda:0   conv2.weight
cuda:0   conv2.bias
cuda:0   fc1.weight
cuda:0   fc1.bias
cuda:0   fc2.weight
cuda:0   fc2.bias
cuda:0   out.weight
cuda:0   out.bias


In [66]:
sample = torch.ones(1,1,28,28)
sample.shape

torch.Size([1, 1, 28, 28])

In [67]:
try: network(sample)
except Exception as e: print(e)

Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same


In [68]:
try: 
    pred = network(sample.to('cuda'))
    print(pred)
except Exception as e: print(e)

tensor([[ 0.0517,  0.0428,  0.0534,  0.0659, -0.0305,  0.0815,  0.0928,  0.1212,  0.0489,  0.0225]], device='cuda:0',
       grad_fn=<AddmmBackward>)


checking for GPU

In [69]:
torch.cuda.is_available()

True

In [77]:
# 使用RunManager和RunBuilder类可以使得程序更易扩展
params = OrderedDict(
    lr = [.01],
    batch_size =[1000, 2000],
    device = ['cuda', 'cpu'],
    shuffle = [True, False],
    num_workers = [0]
)
m = RunManager()
for run in RunBuilder.get_runs(params):
    device = torch.device(run.device)
    network = Network().to(device)
    loader = torch.utils.data.DataLoader(train_set, batch_size=run.batch_size, shuffle=run.shuffle, num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            images = batch[0].to(device)
            labels = batch[1].to(device)
#             images, labels = batch
            preds = network(images)
            loss = F.cross_entropy(preds, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss)
            m.track_num_correct(preds, labels)
            
            
        m.end_epoch()
    m.end_run()
m.save('resuls')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,device,shuffle,num_workers
0,1,1,1.038043,0.607883,3.971931,4.498997,0.01,1000,cuda,True,0
1,2,1,1.058814,0.59405,4.055964,4.416968,0.01,1000,cuda,False,0
2,3,1,1.054562,0.6052,6.243961,6.702615,0.01,1000,cpu,True,0
3,4,1,1.103798,0.572667,6.125513,6.569273,0.01,1000,cpu,False,0
4,5,1,1.33138,0.496017,4.011735,4.82145,0.01,2000,cuda,True,0
5,6,1,1.286797,0.5313,4.096471,4.846245,0.01,2000,cuda,False,0
6,7,1,1.259228,0.5265,6.829472,7.6987,0.01,2000,cpu,True,0
7,8,1,1.351675,0.4883,6.631814,7.564311,0.01,2000,cpu,False,0


In [79]:
pd.DataFrame.from_dict(m.run_data, orient='columns').sort_values('epoch duration')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,device,shuffle,num_workers
0,1,1,1.038043,0.607883,3.971931,4.498997,0.01,1000,cuda,True,0
4,5,1,1.33138,0.496017,4.011735,4.82145,0.01,2000,cuda,True,0
1,2,1,1.058814,0.59405,4.055964,4.416968,0.01,1000,cuda,False,0
5,6,1,1.286797,0.5313,4.096471,4.846245,0.01,2000,cuda,False,0
3,4,1,1.103798,0.572667,6.125513,6.569273,0.01,1000,cpu,False,0
2,3,1,1.054562,0.6052,6.243961,6.702615,0.01,1000,cpu,True,0
7,8,1,1.351675,0.4883,6.631814,7.564311,0.01,2000,cpu,False,0
6,7,1,1.259228,0.5265,6.829472,7.6987,0.01,2000,cpu,True,0


In [80]:
network.cuda()

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

# 标准化与正则化