In [1]:
import torch

## code from d2l package

In [2]:
!nvidia-smi

Wed Jun 01 22:16:43 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 456.71       Driver Version: 456.71       CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 3090   WDDM  | 00000000:04:00.0 Off |                  N/A |
| 48%   51C    P2   109W / 350W |    643MiB / 24576MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 3090   WDDM  | 00000000:65:00.0  On |                  N/A |
| 30%   36C    P8    12W / 350W |    255MiB / 24576MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-------

In [3]:
class MLP(torch.nn.Module):
    def __init__(self):
        super().__init__()# 父类构造函数
        self.hidden = torch.nn.Linear(20, 256)  # 隐藏层
        self.out = torch.nn.Linear(256, 10)  # 输出层

    # 定义模型的前向传播，即如何根据输入X返回所需的模型输出
    def forward(self, X):
        # 注意，这里我们使用ReLU的函数版本，其在torch.nn.functional模块中定义。
        return self.out(torch.nn.functional.relu(self.hidden(X)))


def try_gpu(i=0):
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')

def try_all_gpus():
    devices = [torch.device(f'cuda:{i}')
             for i in range(torch.cuda.device_count())]
    return devices if devices else [torch.device('cpu')]

## homework code for ch5 sec1

In [4]:
#T1
class Concat(torch.nn.Module):
    def __init__(self, block=None, count=2, *blocks, **kwargs):
        '''
        利用block构建平行网络，可以是count个同种block，或者手动指定已经初始化的自定义blocks
        '''
        super().__init__()
        if block:
            self.blocks = torch.nn.ModuleList([block(**kwargs) for _ in range(count)])
        else:
            self.blocks = torch.nn.ModuleList(*blocks)
    
    def forward(self, x):
        return torch.cat(tuple(block(x) for block in self.blocks), dim=1)


t = Concat(block=torch.nn.Linear, count=2, in_features=10, out_features=5)
t(torch.rand((5, 10))).shape

torch.Size([5, 10])

In [5]:
print(t)
print(t._modules)

Concat(
  (blocks): ModuleList(
    (0): Linear(in_features=10, out_features=5, bias=True)
    (1): Linear(in_features=10, out_features=5, bias=True)
  )
)
OrderedDict([('blocks', ModuleList(
  (0): Linear(in_features=10, out_features=5, bias=True)
  (1): Linear(in_features=10, out_features=5, bias=True)
))])


In [6]:
#T2 工厂函数的意思就是，返回一类对象实例的函数。
def torch_factory(block, count, **kwargs):
    return Concat(block=block, count=count, **kwargs)


print(torch_factory(torch.nn.Linear, 5, in_features=10, out_features=5))

Concat(
  (blocks): ModuleList(
    (0): Linear(in_features=10, out_features=5, bias=True)
    (1): Linear(in_features=10, out_features=5, bias=True)
    (2): Linear(in_features=10, out_features=5, bias=True)
    (3): Linear(in_features=10, out_features=5, bias=True)
    (4): Linear(in_features=10, out_features=5, bias=True)
  )
)


## homework code for ch5 sec2

In [7]:
model = MLP()
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.0876,  0.2014, -0.1997,  ..., -0.0964, -0.1175, -0.0261],
        [ 0.0434,  0.1954,  0.0686,  ..., -0.2101, -0.1825,  0.1632],
        [-0.0588, -0.1238, -0.0128,  ..., -0.1722, -0.2157, -0.1051],
        ...,
        [ 0.1973, -0.0645, -0.1410,  ..., -0.1386,  0.1426, -0.0180],
        [ 0.2084, -0.1913,  0.1983,  ..., -0.0404, -0.1468, -0.2148],
        [ 0.0262, -0.0401, -0.0232,  ...,  0.1988,  0.0845,  0.2073]],
       requires_grad=True)
Parameter containing:
tensor([-2.1719e-01, -1.6592e-01,  1.5490e-01, -1.3255e-01, -1.5105e-03,
         2.2077e-01,  7.6267e-02, -3.4467e-02, -1.8140e-01,  9.1054e-02,
         2.0278e-01,  2.3464e-02,  1.7050e-01,  4.5233e-02,  7.1811e-02,
        -1.9127e-01, -1.3227e-01, -1.0372e-01, -1.8215e-01, -7.5810e-02,
        -1.9380e-01, -1.8864e-01,  7.5412e-02,  2.4623e-02, -3.5399e-02,
         1.3543e-01,  9.9011e-02, -1.2826e-01,  2.0938e-01,  5.5167e-02,
        -1.6369e-01,  1.2675e-01, -1.4339e-01,  7.0082e-0

In [8]:
for named_param in model.named_parameters():
    print(named_param)

('hidden.weight', Parameter containing:
tensor([[ 0.0876,  0.2014, -0.1997,  ..., -0.0964, -0.1175, -0.0261],
        [ 0.0434,  0.1954,  0.0686,  ..., -0.2101, -0.1825,  0.1632],
        [-0.0588, -0.1238, -0.0128,  ..., -0.1722, -0.2157, -0.1051],
        ...,
        [ 0.1973, -0.0645, -0.1410,  ..., -0.1386,  0.1426, -0.0180],
        [ 0.2084, -0.1913,  0.1983,  ..., -0.0404, -0.1468, -0.2148],
        [ 0.0262, -0.0401, -0.0232,  ...,  0.1988,  0.0845,  0.2073]],
       requires_grad=True))
('hidden.bias', Parameter containing:
tensor([-2.1719e-01, -1.6592e-01,  1.5490e-01, -1.3255e-01, -1.5105e-03,
         2.2077e-01,  7.6267e-02, -3.4467e-02, -1.8140e-01,  9.1054e-02,
         2.0278e-01,  2.3464e-02,  1.7050e-01,  4.5233e-02,  7.1811e-02,
        -1.9127e-01, -1.3227e-01, -1.0372e-01, -1.8215e-01, -7.5810e-02,
        -1.9380e-01, -1.8864e-01,  7.5412e-02,  2.4623e-02, -3.5399e-02,
         1.3543e-01,  9.9011e-02, -1.2826e-01,  2.0938e-01,  5.5167e-02,
        -1.6369e-01,  

In [9]:
print(model.out.weight.grad)

None


## homework code for ch5 sec3

In [10]:
# T1
class DimensionReduction(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.weight = torch.rand((output_dim, input_dim, input_dim))

    def forward(self, x):
        return torch.cat(tuple((
                                torch.sum(torch.cat(tuple(self.weight[k][i][j]*x[:, i:i+1]*x[:, j:j+1] 
                                                    for i in range(x.shape[1]) 
                                                    for j in range(x.shape[1])), dim=1), 
                                                    dim=1, keepdim=True)
                                ) for k in range(self.weight.shape[0])), dim=1)


In [11]:
# T2
class FrotierLayer(torch.nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        x_f = torch.fft.fft(x)
        return x_f[:, :round(x.shape[0]/2)]

## homework code for ch5 sec4

None

## homework code for ch5 sec5

In [12]:
# T1
t1 = torch.rand((10000,10000))
t2 = torch.rand((10000,10000))

In [13]:
%%time
for _ in range(50):
    t1 @ t2

Wall time: 1min 13s


In [14]:
t1 = t1.to(try_gpu())
t2 = t2.to(try_gpu())

In [15]:
%%time
for _ in range(50):
    t1@t2

Wall time: 976 ms


In [16]:
# 释放gpu高内存
del t1, t2

In [17]:
t1 = torch.rand((10,10))
t2 = torch.rand((10,10))

In [18]:
%%time
for _ in range(500):
    t1 @ t2


Wall time: 1.99 ms


In [19]:
t1 = t1.to(try_gpu())
t2 = t2.to(try_gpu())

In [20]:
%%time
for _ in range(500):
    t1 @ t2

Wall time: 19 ms


In [21]:
#T4
t1 = torch.rand((10000,10000)).to(try_gpu(0))
t2 = torch.rand((10000,10000)).to(try_gpu(0))
t3, t4 = t1.to(try_gpu(1)), t2.to(try_gpu(1))


In [22]:
%%time
for _ in range(100):
    t1@t2


Wall time: 2.99 ms


In [23]:
%%time
for _ in range(50):
    t1@t2
    t3@t4

Wall time: 1.1 s
