#　每天一个Pytorch用法

In [1]:
import torch
from torch import nn

# 基本运算

In [5]:
attn = torch.randn(3, 3)
attn

tensor([[-0.6821, -0.9529,  0.9229],
        [-1.0992,  0.8397,  0.5995],
        [ 0.3650,  1.6853, -2.2795]])

In [16]:
# 解码器类型数据
mask_decoder = torch.tril(torch.ones_like(attn))
mask_decoder

tensor([[1., 0., 0.],
        [1., 1., 0.],
        [1., 1., 1.]])

In [17]:
mask = torch.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
mask

tensor([[1, 0, 0],
        [0, 1, 0],
        [0, 0, 1]])

In [30]:
attn.masked_fill(mask == 1, 0)

tensor([[ 0.0000, -0.9529,  0.9229],
        [-1.0992,  0.0000,  0.5995],
        [ 0.3650,  1.6853,  0.0000]])

In [20]:
attn

tensor([[-0.6821, -0.9529,  0.9229],
        [-1.0992,  0.8397,  0.5995],
        [ 0.3650,  1.6853, -2.2795]])

In [29]:
# 布尔类型的值
attn.masked_fill(mask_decoder == 0, float("-inf"))

tensor([[-0.6821,    -inf,    -inf],
        [-1.0992,  0.8397,    -inf],
        [ 0.3650,  1.6853, -2.2795]])

# 数据

In [43]:
class A:
    def __init__(self):
        self.name = "zhangwanyu"
        
class B:
    def __init__(self, age):
        self.age = age
        self.alis_name = A()
    
    def ptint_A(self):
        print(self.alis_name.name)
        
b = B(24)

In [46]:
b.age

24

In [44]:
c = B(25)

In [47]:
c.age

25

In [45]:
d = B(26)

In [48]:
d.age

26

In [39]:
b

<__main__.B at 0x7fc695e4f430>

In [40]:
d

<__main__.B at 0x7fc695e672e0>

In [41]:
c

<__main__.B at 0x7fc695c7ff10>

In [26]:
def A():
    name = "张婉玉"
    return name

In [27]:
A()

'张婉玉'

In [28]:
b.age

24

In [30]:
b.ptint_A()

AttributeError: 'B' object has no attribute 'name'

In [14]:
b.alis_name.name

'zhangwanyu'

In [17]:
a = A()

In [16]:
a.name

'zhangwanyu'

In [19]:
alis_name = A()

In [21]:
alis_name.name

'zhangwanyu'

# 模型

## nn.Paramters的使用
[PyTorch中的torch.nn.Parameter() 详解](https://blog.csdn.net/weixin_44966641/article/details/118730730)

In [5]:
torch.nn.Parameter(torch.randn(1, 2))

Parameter containing:
tensor([[-0.9437,  1.3329]], requires_grad=True)

## nn.Embedding的使用
[Pytorch中的nn.Embedding()](https://blog.csdn.net/qq_38883844/article/details/104331382)

### 初始化Embedding

In [36]:
# Embedding层的创建
embeddings = nn.Embedding(num_embeddings=10, embedding_dim=3)
x = torch.LongTensor([[1, 2, 5, 4], [2, 3, 4, 9]])
out = embeddings(x)
out

tensor([[[-0.6455,  0.7581,  0.3701],
         [-0.2305, -1.3838,  0.5917],
         [-0.1369, -0.2685, -1.9981],
         [-3.5282, -0.1207,  1.8074]],

        [[-0.2305, -1.3838,  0.5917],
         [-1.2788,  0.7246, -0.5846],
         [-3.5282, -0.1207,  1.8074],
         [ 1.2394,  1.1400,  0.3093]]], grad_fn=<EmbeddingBackward>)

In [37]:
# padding_idx的使用
embeddings = nn.Embedding(num_embeddings=10, embedding_dim=3, padding_idx=1)
embeddings.weight

Parameter containing:
tensor([[ 0.2267,  0.3589,  0.7066],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.3672, -0.5855,  1.2222],
        [ 0.1638,  0.9372, -0.7770],
        [ 0.0570,  0.7663, -1.9659],
        [ 0.4338,  0.1755, -0.3985],
        [-0.4119, -0.1239,  0.8188],
        [ 1.3993,  1.3706, -1.7001],
        [-1.4491, -1.0667,  0.2143],
        [-0.0447,  0.5594,  0.7713]], requires_grad=True)

### 加载预训练向量

#### 方法一(建议)

In [38]:
pre_train_embed = torch.rand((4, 5))
embeds = nn.Embedding.from_pretrained(pre_train_embed)

In [39]:
pre_train_embed

tensor([[0.0236, 0.6508, 0.0331, 0.0976, 0.7699],
        [0.3862, 0.3805, 0.3369, 0.1542, 0.6186],
        [0.6175, 0.8434, 0.6832, 0.2313, 0.9607],
        [0.1572, 0.3083, 0.9905, 0.5265, 0.9703]])

In [40]:
embeds.weight

Parameter containing:
tensor([[0.0236, 0.6508, 0.0331, 0.0976, 0.7699],
        [0.3862, 0.3805, 0.3369, 0.1542, 0.6186],
        [0.6175, 0.8434, 0.6832, 0.2313, 0.9607],
        [0.1572, 0.3083, 0.9905, 0.5265, 0.9703]])

In [41]:
#　默认为冻结，即不更新embedding中的参数
embeds.weight.requires_grad

False

#### 方法二

In [42]:
embeds = nn.Embedding(num_embeddings=4, embedding_dim=5)
embeds.weight

Parameter containing:
tensor([[-0.7435,  2.1544,  0.9816,  1.4552,  1.0855],
        [ 1.4446, -0.6006,  2.1655, -1.5185,  0.2170],
        [-0.9108, -0.4313, -0.5445, -1.6522,  0.7080],
        [-1.6187,  0.1293,  0.8513,  2.2911, -1.2237]], requires_grad=True)

In [43]:
embeds.weight = nn.Parameter(torch.rand(4, 5))

In [44]:
embeds.weight

Parameter containing:
tensor([[0.4243, 0.2984, 0.1440, 0.3879, 0.1798],
        [0.8419, 0.7575, 0.7393, 0.9052, 0.1645],
        [0.1252, 0.2824, 0.4781, 0.1542, 0.6495],
        [0.0816, 0.2640, 0.5618, 0.4743, 0.1947]], requires_grad=True)

In [45]:
# 默认该方法需要进行的参数更新
embeds.weight.requires_grad

True

In [46]:
# 设置不更新
embeds.weight.requires_grad = False

In [47]:
embeds.weight.requires_grad

False

#### 方法三

In [48]:
embeds = nn.Embedding(num_embeddings=4, embedding_dim=5)
embeds.weight

Parameter containing:
tensor([[ 0.2325,  0.1292,  0.3445, -0.4335, -3.3033],
        [ 0.3277,  1.0817,  0.4431,  0.3838,  0.1849],
        [ 0.4300, -0.2442,  0.5124, -0.2554,  0.9184],
        [ 0.8845, -1.3623,  0.1273,  1.0796,  1.8181]], requires_grad=True)

In [49]:
embeds.weight.data = torch.rand(4, 5)

In [50]:
embeds.weight

Parameter containing:
tensor([[0.9770, 0.7766, 0.8749, 0.1747, 0.3754],
        [0.1453, 0.9039, 0.3793, 0.8667, 0.4844],
        [0.4188, 0.0663, 0.4424, 0.3559, 0.3196],
        [0.3518, 0.6406, 0.4359, 0.0081, 0.9686]], requires_grad=True)

In [51]:
embeds.weight.requires_grad = False

In [52]:
embeds.weight.requires_grad

False

## nn.Conv1d

## nn.DataParallel

## nn.LayerNorm与nn.BatchNorm

In [1]:
import torch.nn as nn


class AlexNet(nn.Module):
    def __init__(self, num_classes=1000, init_weights=False):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            # input[3, 224, 224]  output[48, 55, 55]
            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            # output[48, 27, 27]
            nn.MaxPool2d(kernel_size=3, stride=2),
            # output[128, 27, 27]
            nn.Conv2d(48, 128, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            # output[128, 13, 13]
            nn.MaxPool2d(kernel_size=3, stride=2),
            # output[192, 13, 13]
            nn.Conv2d(128, 192, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # output[192, 13, 13]
            nn.Conv2d(192, 192, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # output[128, 13, 13]
            nn.Conv2d(192, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # output[128, 6, 6]
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(128 * 6 * 6, 2048),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(
                    m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


if __name__ == '__main__':

    model = AlexNet()

#     print('model children: ')
#     for module in model.children():
#         print(module)

#     print('model modules: ')
#     for module in model.modules():
#         print(module)

#     print('model named children: ')
#     for name, module in model.named_children():
#         print('name: {}, module: {}'.format(name, module))

    print('model named modules: ')
    for name, module in model.named_modules():
        print('name: {}, module: {}'.format(name, module))

#     print('model named parameters: ')
#     for name, parameter in model.named_parameters():
#          print('name: {}, parameter: {}'.format(name, parameter))

#     print('parameters: ')
#     for parameter in model.parameters():
#         print('parameter: {}'.format(parameter))

model named modules: 
name: , module: AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 48, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(48, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(192, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=4608, out_features=2048, bias=True)
    (2): ReLU(

# 激活函数

## GELU

# 损失

## BCELoss与CrossEntropyLoss对比

In [3]:
import numpy as np
import torch

In [4]:
array_data = np.array([[1, 2, 3], [4, 5, 6]])
array_data

array([[1, 2, 3],
       [4, 5, 6]])

In [6]:
type(array_data)

numpy.ndarray

In [5]:
tensor_data = torch.from_numpy(array_data)
tensor_data

tensor([[1, 2, 3],
        [4, 5, 6]])

In [7]:
type(tensor_data)

torch.Tensor