In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

from torch.utils.tensorboard import SummaryWriter   # 用来将网络数据发送到tensorboard中

import matplotlib.pyplot as plt 
import math
import time
import pandas as pd
import json
from IPython.display import clear_output

from collections import OrderedDict
from collections import namedtuple
from itertools import product

In [2]:
torch.manual_seed(50)
network = nn.Sequential(nn.Linear(2, 1))

In [3]:
network[0].weight       # 下标0表示网络的第一层

Parameter containing:
tensor([[ 0.1669, -0.6100]], requires_grad=True)

In [4]:
torch.save(network.state_dict(), "./network.pt")    # pt即pytorch网络文件后缀

In [5]:
t = torch.rand(2)
o = network(t)
o.backward()
optimizer = optim.Adam(network.parameters(), lr=0.01)
optimizer.step()        # 反向传播一次，修改网络参数

network[0].weight

Parameter containing:
tensor([[ 0.1569, -0.6200]], requires_grad=True)

In [6]:
network.load_state_dict(torch.load("./network.pt"))     # 导入保存的网络
network[0].weight

Parameter containing:
tensor([[ 0.1669, -0.6100]], requires_grad=True)

In [7]:
torch.manual_seed(50)
network = nn.Sequential(nn.Linear(2, 1))
network[0].weight       # 下标0表示网络的第一层

Parameter containing:
tensor([[ 0.1669, -0.6100]], requires_grad=True)

In [8]:
# 修改一次网络参数
t = torch.rand(2)
o = network(t)
o.backward()
optimizer = optim.Adam(network.parameters(), lr=0.01)
optimizer.step()
network[0].weight

Parameter containing:
tensor([[ 0.1569, -0.6200]], requires_grad=True)

In [9]:
# 重新创建网络
torch.manual_seed(50)
network = nn.Sequential(nn.Linear(2, 1))
network[0].weight       # 下标0表示网络的第一层

Parameter containing:
tensor([[ 0.1669, -0.6100]], requires_grad=True)

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

from torch.utils.tensorboard import SummaryWriter   # 用来将网络数据发送到tensorboard中

import matplotlib.pyplot as plt
import math
import time
import pandas as pd
import json
from IPython.display import clear_output

from collections import OrderedDict
from collections import namedtuple
from itertools import product

class RunBuilder():
    @staticmethod          # 静态函数可以直接用类名调用，不需要创建类的实例
    def get_runs(params):
        Run = namedtuple("Run", params.keys())   # 用于构建带名字的元组，名字为"Run"
        
        runs = []
        for v in product(*params.values()):      # 对于所有超参数的组合
            runs.append(Run(*v))                 # 构建一个带名元组
        
        return runs
class RunManager():
    def __init__(self):
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_strat_time = None
        
        self.network = None
        self.loader = None
        self.tb = None       # summarywriter
        
    def begin_run(self, run, network, loader):        # 每次运行开始调用
        self.run_start_time = time.time()
        
        self.run_params = run      # run代表所有超参数的元组
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')    # 为本次运行起名
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('image', grid)
        self.tb.add_graph(self.network, images)
        
    def end_run(self):           # 每次运行结束调用
        self.tb.close()
        self.epoch_count = 0     # 初始化epoch计数
        
    def begin_epoch(self):         # 每个epoch开始时
        self.epoch_start_time = time.time()  # 设置开始时间
        
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        
    def end_epoch(self):
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)
        
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
            
        results = OrderedDict()            # 有序字典存放运行结果
        # 把需要的数据加入字典
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration
        for k, v in self.run_params._asdict().items(): # 对于本次运行的所有超参数，转换成字典，并提取键值
            results[k] = v         # 加入results
            
        self.run_data.append(results)   # 加入run_data列表
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')  # 利用pandas将结果格式化输出
        
        clear_output(wait=True)    # 清空Jupyter notebook当前输出
        display(df)                # 在Jupyter notebook中输出结果

    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size

    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
        
    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):

        pd.DataFrame.from_dict(
            self.run_data,
            orient='columns'
        ).to_csv(f'{fileName}.csv')

        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [11]:
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

loader = DataLoader(train_set, batch_size=len(train_set), num_workers=1)    # 一次读入全部图像
data = next(iter(loader))
mean = data[0].mean()
std = data[0].std()    # 求所有图像所有像素的均值和标准差

train_set_normal = torchvision.datasets.FashionMNIST(
    root='./data',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
)

In [13]:
class NetworkFactory():
    @staticmethod
    def get_network(name):      # 按名称选择网络
        if name == "no_batch_norm":
            torch.manual_seed(50)
            return nn.Sequential(           # 没有BatchNorm
                nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Flatten(start_dim=1),
                nn.Linear(in_features=12*4*4, out_features=120),
                nn.ReLU(),
                nn.Linear(in_features=120, out_features=60),
                nn.ReLU(),
                nn.Linear(in_features=60, out_features=10)
            )
        elif name == "batch_norm":
            torch.manual_seed(50)
            return nn.Sequential(           # 有BatchNorm
                nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.BatchNorm2d(6),  # 6为上一层的输出通道数
                nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Flatten(start_dim=1),
                nn.Linear(in_features=12*4*4, out_features=120),
                nn.ReLU(),
                nn.BatchNorm1d(120), # 120为上一层的输出
                nn.Linear(in_features=120, out_features=60),
                nn.ReLU(),
                nn.Linear(in_features=60, out_features=10)
            )
        else:
            return None


In [14]:
trainsets = {
    'not_normal':train_set,
    'normal': train_set_normal
}
# networks = {
#     "no_batch_norm": network1,
#     "batch_norm": network2
# }
params = OrderedDict(
    lr = [0.01],
    batch_size = [1000],
    shuffle=[True],
    num_workers = [1],
    device = ['cpu'],
    train_set = ['normal', 'not_normal'],
    network = ['no_batch_norm', 'batch_norm']                 
)

m = RunManager()
for run in RunBuilder.get_runs(params):

    device = torch.device(run.device)
    # network = networks[run.network].to(device)
    network = NetworkFactory.get_network(run.network).to(device)
    loader = DataLoader(trainsets[run.train_set], batch_size=run.batch_size, shuffle=run.shuffle, num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr = run.lr)

    m.begin_run(run, network, loader)
    for epoch in range(5):
        m.begin_epoch()
        for batch in loader:
            images, labels = batch      # get batch
            preds = network(images)     # pass batch
            loss = F.cross_entropy(preds, labels)   # caculate loss
            optimizer.zero_grad()       # zero gradients
            loss.backward()             # calculate gradients
            optimizer.step()            # update weights

            m.track_loss(loss)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save("results")

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers,device,train_set,network
0,1,1,0.906163,0.65635,34.096231,38.631147,0.01,1000,True,1,cpu,normal,no_batch_norm
1,1,2,0.478183,0.820067,23.827635,62.603346,0.01,1000,True,1,cpu,normal,no_batch_norm
2,1,3,0.401715,0.852017,22.459697,85.180492,0.01,1000,True,1,cpu,normal,no_batch_norm
3,1,4,0.353065,0.8704,26.485103,111.809893,0.01,1000,True,1,cpu,normal,no_batch_norm
4,1,5,0.324558,0.87945,19.341428,131.230799,0.01,1000,True,1,cpu,normal,no_batch_norm
5,2,1,0.59174,0.785783,16.938822,21.416579,0.01,1000,True,1,cpu,normal,batch_norm
6,2,2,0.358458,0.8664,22.773369,44.286071,0.01,1000,True,1,cpu,normal,batch_norm
7,2,3,0.308045,0.884017,18.307723,62.708985,0.01,1000,True,1,cpu,normal,batch_norm
8,2,4,0.288121,0.89115,18.373741,81.191927,0.01,1000,True,1,cpu,normal,batch_norm
9,2,5,0.270483,0.8983,18.237119,99.538632,0.01,1000,True,1,cpu,normal,batch_norm


In [15]:
pd.DataFrame.from_dict(m.run_data).sort_values('accuracy', ascending=False) # 将输出按准确率降序排列

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers,device,train_set,network
19,4,5,0.262084,0.902133,16.675704,83.068348,0.01,1000,True,1,cpu,not_normal,batch_norm
9,2,5,0.270483,0.8983,18.237119,99.538632,0.01,1000,True,1,cpu,normal,batch_norm
18,4,4,0.278265,0.895483,16.206564,66.243871,0.01,1000,True,1,cpu,not_normal,batch_norm
8,2,4,0.288121,0.89115,18.373741,81.191927,0.01,1000,True,1,cpu,normal,batch_norm
17,4,3,0.301399,0.887117,17.221055,49.901066,0.01,1000,True,1,cpu,not_normal,batch_norm
7,2,3,0.308045,0.884017,18.307723,62.708985,0.01,1000,True,1,cpu,normal,batch_norm
4,1,5,0.324558,0.87945,19.341428,131.230799,0.01,1000,True,1,cpu,normal,no_batch_norm
16,4,2,0.334363,0.876467,14.095695,32.570455,0.01,1000,True,1,cpu,not_normal,batch_norm
14,3,5,0.34825,0.872983,15.779137,78.658988,0.01,1000,True,1,cpu,not_normal,no_batch_norm
3,1,4,0.353065,0.8704,26.485103,111.809893,0.01,1000,True,1,cpu,normal,no_batch_norm


In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

from torch.utils.tensorboard import SummaryWriter   # 用来将网络数据发送到tensorboard中

import matplotlib.pyplot as plt 
import math
import time
import pandas as pd
import json
from IPython.display import clear_output

from collections import OrderedDict
from collections import namedtuple
from itertools import product

class RunBuilder():
    @staticmethod          # 静态函数可以直接用类名调用，不需要创建类的实例
    def get_runs(params):
        Run = namedtuple("Run", params.keys())   # 用于构建带名字的元组，名字为"Run"
        
        runs = []
        for v in product(*params.values()):      # 对于所有超参数的组合
            runs.append(Run(*v))                 # 构建一个带名元组
        
        return runs
class RunManager():
    def __init__(self):
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_strat_time = None
        
        self.network = None
        self.loader = None
        self.tb = None       # summarywriter
        
    def begin_run(self, run, network, loader):        # 每次运行开始调用
        self.run_start_time = time.time()
        
        self.run_params = run      # run代表所有超参数的元组
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')    # 为本次运行起名
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        
        self.tb.add_image('image', grid)
        self.tb.add_graph(self.network, images)
        
    def end_run(self):           # 每次运行结束调用
        self.tb.close()
        self.epoch_count = 0     # 初始化epoch计数
        
    def begin_epoch(self):         # 每个epoch开始时
        self.epoch_start_time = time.time()  # 设置开始时间
        
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        
    def end_epoch(self):
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)
        
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
            
        results = OrderedDict()            # 有序字典存放运行结果
        # 把需要的数据加入字典
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration
        for k, v in self.run_params._asdict().items(): # 对于本次运行的所有超参数，转换成字典，并提取键值
            results[k] = v         # 加入results
            
        self.run_data.append(results)   # 加入run_data列表
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')  # 利用pandas将结果格式化输出
        
        clear_output(wait=True)    # 清空Jupyter notebook当前输出
        display(df)                # 在Jupyter notebook中输出结果

    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size

    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
        
    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):

        pd.DataFrame.from_dict(
            self.run_data,
            orient='columns'
        ).to_csv(f'{fileName}.csv')

        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)
class NetworkFactory():
    @staticmethod
    def get_network(name):      # 按名称选择网络
        if name == "no_batch_norm":
            torch.manual_seed(50)
            return nn.Sequential(           # 没有BatchNorm
                nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Flatten(start_dim=1),
                nn.Linear(in_features=12*4*4, out_features=120),
                nn.ReLU(),
                nn.Linear(in_features=120, out_features=60),
                nn.ReLU(),
                nn.Linear(in_features=60, out_features=10)
            )
        elif name == "batch_norm":
            torch.manual_seed(50)
            return nn.Sequential(           # 有BatchNorm
                nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.BatchNorm2d(6),  # 6为上一层的输出通道数
                nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Flatten(start_dim=1),
                nn.Linear(in_features=12*4*4, out_features=120),
                nn.ReLU(),
                nn.BatchNorm1d(120), # 120为上一层的输出
                nn.Linear(in_features=120, out_features=60),
                nn.ReLU(),
                nn.Linear(in_features=60, out_features=10)
            )
        else:
            return None
params = OrderedDict(
    lr = [0.01],
    batch_size = [1000],
    shuffle=[True],
    num_workers = [1],
    device = ['cpu'],
    network = ['no_batch_norm', 'batch_norm']                 
)

m = RunManager()
for run in RunBuilder.get_runs(params):

    device = torch.device(run.device)
    # network = networks[run.network].to(device)
    network = NetworkFactory.get_network(run.network).to(device)
    loader = DataLoader(train_set, batch_size=run.batch_size, shuffle=run.shuffle, num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr = run.lr)

    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            images, labels = batch      # get batch
            preds = network(images)     # pass batch
            loss = F.cross_entropy(preds, labels)   # caculate loss
            optimizer.zero_grad()       # zero gradients
            loss.backward()             # calculate gradients
            optimizer.step()            # update weights

            m.track_loss(loss)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save("results")

# 将结果按准确度降序排列
pd.DataFrame.from_dict(m.run_data).sort_values('accuracy', ascending=False) 

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers,device,network
0,1,1,0.986095,0.6258,14.278207,19.520839,0.01,1000,True,1,cpu,no_batch_norm
1,2,1,0.56152,0.7969,14.784825,20.522324,0.01,1000,True,1,cpu,batch_norm


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle,num_workers,device,network
1,2,1,0.56152,0.7969,14.784825,20.522324,0.01,1000,True,1,cpu,batch_norm
0,1,1,0.986095,0.6258,14.278207,19.520839,0.01,1000,True,1,cpu,no_batch_norm
