In [11]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(
    nn.Linear(20, 256),
    nn.ReLU(),
    nn.Linear(256, 10)
)

X = torch.rand(2, 20)

print(net(X))

tensor([[-0.0277, -0.2021,  0.1182,  0.0136, -0.0244, -0.0506,  0.0403,  0.0109,
         -0.2502, -0.0485],
        [ 0.0291, -0.1190,  0.0490, -0.0420,  0.0313,  0.0423,  0.0707,  0.0024,
         -0.1651,  0.0424]], grad_fn=<AddmmBackward0>)


In [12]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)

    def forward(self, x):
        return self.out(F.relu(self.hidden(x)))

In [14]:
net  = MLP()
print(net(X))

tensor([[-0.1515, -0.0734, -0.0552, -0.1136, -0.1307,  0.1394,  0.2825,  0.0846,
          0.2862, -0.1795],
        [-0.0513, -0.1965, -0.1761, -0.0359, -0.2581,  0.0568,  0.2420, -0.0529,
          0.2803, -0.1158]], grad_fn=<AddmmBackward0>)


In [15]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            self.add_module(str(idx), module)

    def forward(self, x):
        for block in self.children():
            x = block(x)
        return x


In [17]:
net = MySequential(
    nn.Linear(20, 256),
    nn.ReLU(),
    nn.Linear(256, 10)
)
print(net(X))

tensor([[-0.0053,  0.0406,  0.0556,  0.1047,  0.1005, -0.0892,  0.0493, -0.1996,
          0.0351, -0.1441],
        [-0.0653, -0.0378,  0.0875,  0.1515,  0.1838, -0.1095,  0.0607, -0.1725,
          0.1080, -0.2009]], grad_fn=<AddmmBackward0>)


In [19]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20),requires_grad=False)
        self.linear = nn.Linear(20, 20)
    def forward(self, x):
        x = self.linear(x)
        x = F.relu(torch.mm(x, self.rand_weight) + 1)
        x = self.linear(x)
        while x.abs().sum() > 1:
            x /= 2

        return x.sum()

In [21]:
net = FixedHiddenMLP()
print(net(X))

tensor(0.2750, grad_fn=<SumBackward0>)


In [23]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(20, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        self.linear = nn.Linear(32, 16)

    def forward(self, x):
        return self.linear(self.net(x))

chimera = nn.Sequential(
    NestMLP(),
    nn.Linear(16, 20),
    FixedHiddenMLP()

)

chimera(X)



torch.Size([2, 20])


In [2]:
import torch
from torch import nn
net = nn.Sequential(
    nn.Linear(4,8),
    nn.ReLU(),
    nn.Linear(8, 1)
)

X = torch.rand(2, 4)
print(net(X))

tensor([[0.1306],
        [0.1476]], grad_fn=<AddmmBackward0>)


In [4]:
print(net[2].state_dict())

OrderedDict([('weight', tensor([[ 0.0623,  0.0907, -0.0481,  0.3524,  0.0970, -0.0724, -0.0956,  0.2872]])), ('bias', tensor([0.0126]))])


In [6]:
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.0126], requires_grad=True)
tensor([0.0126])


In [8]:
print(net[2].weight.grad)

None


In [11]:
def block1():
    return nn.Sequential(
        nn.Linear(4, 8),
        nn.ReLU(),
        nn.Linear(8,4),
        nn.ReLU()
    )
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block_{i}', block1())

    return net

In [13]:
rgnet = nn.Sequential(
    block2(),
    nn.Linear(4, 1)
)

print(rgnet(X))

tensor([[0.4738],
        [0.4735]], grad_fn=<AddmmBackward0>)


In [15]:
print(rgnet)

Sequential(
  (0): Sequential(
    (block_0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block_1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block_2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block_3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [17]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
net[0].weight.data[0], net[0].bias.data

(tensor([-0.0024, -0.0018,  0.0259,  0.0108]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [19]:
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)
net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data

(tensor([1., 1., 1., 1.]), tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

# 延迟初始化

- 框架能够自动推断参数形状
- 可以通过模型传递数据，使框架最终初始化参数

In [3]:
import torch
import torch.nn.functional as F
from torch import nn

class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, X):

        return X - X.mean()


In [4]:
layer = CenteredLayer()
print(layer(torch.FloatTensor([1, 2, 3, 4, 5])))

tensor([-2., -1.,  0.,  1.,  2.])


In [5]:
net = nn.Sequential(
    nn.Linear(8, 128),
    CenteredLayer(),
)

In [6]:
Y = net(torch.rand(4,8))
Y.mean()

tensor(-3.7253e-09, grad_fn=<MeanBackward0>)

In [7]:
class MyLinear(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_features, out_features))
        self.bias = nn.Parameter(torch.zeros(out_features))

    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)

In [8]:
linear = MyLinear(5,3)
linear.weight

Parameter containing:
tensor([[ 0.1619, -0.4214,  0.4307],
        [ 2.1459, -0.0796,  1.3273],
        [ 0.8525, -1.0634, -0.1208],
        [ 0.3868, -0.3044, -1.2747],
        [ 2.1954,  0.7122,  0.7979]], requires_grad=True)

In [9]:
linear(torch.rand(2,5))

tensor([[1.7338, 0.0000, 0.6875],
        [1.8897, 0.0000, 0.0000]])

In [10]:
x = torch.arange(4)

In [11]:
x

tensor([0, 1, 2, 3])

In [12]:
torch.save(x, 'x-file')

In [13]:
x2 = torch.load('x-file')
x2

tensor([0, 1, 2, 3])

In [14]:
y = torch.zeros(4)
torch.save((x, y), 'xy-file')

In [15]:
x2, y2 = torch.load('xy-file')

In [16]:
x2,y2

(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))

In [17]:
mydict = {'x': x, 'y': y}
torch.save(mydict, 'mydict-file')
mydict2 = torch.load('mydict-file')
mydict2

{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}

In [19]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)

    def forward(self, x):
        return self.out(F.relu(self.hidden(x)))

net = MLP()
X = torch.rand(2, 20)
Y = net(X)
torch.save(net.state_dict(), 'mlp.params')

In [20]:
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)

In [21]:
Y_clone = clone(X)
print(Y_clone == Y)

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])


In [4]:
import torch
from torch import nn

torch.device('cpu'),
torch.device('cuda'), torch.device('cuda:0'), torch.device('cuda:1')

(device(type='cuda'),
 device(type='cuda', index=0),
 device(type='cuda', index=1))

In [6]:
torch.cuda.device_count()

1

In [8]:
def try_gpu(i=0):
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def try_all_gpus():
    return [try_gpu(i) for i in range(torch.cuda.device_count())] or [torch.device('cpu')]

In [10]:
try_gpu(0)


device(type='cuda', index=0)

In [12]:
try_gpu(10)

device(type='cpu')

In [14]:
try_all_gpus()

[device(type='cuda', index=0)]

In [16]:
x = torch.tensor([1, 2, 3])
x.device

device(type='cpu')

In [22]:
X = torch.ones(2, 3, device=try_gpu())
X.device

device(type='cuda', index=0)