## 测试使用transformer

In [26]:
import os
import copy
import torch
from torch import nn
from torch import optim
import numpy as np
import utils.calculate_param as cp
import dataset.data_read as data
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

### 加载Transformer模型

In [6]:
model = nn.Transformer(d_model=128, num_encoder_layers=3, num_decoder_layers=3, batch_first=True)

In [7]:
cp.get_info(model)
cp.get_parameter_number(model)

模型的参数信息如下：
encoder.layers.0.self_attn.in_proj_weight : torch.Size([384, 128])
encoder.layers.0.self_attn.in_proj_bias : torch.Size([384])
encoder.layers.0.self_attn.out_proj.weight : torch.Size([128, 128])
encoder.layers.0.self_attn.out_proj.bias : torch.Size([128])
encoder.layers.0.linear1.weight : torch.Size([2048, 128])
encoder.layers.0.linear1.bias : torch.Size([2048])
encoder.layers.0.linear2.weight : torch.Size([128, 2048])
encoder.layers.0.linear2.bias : torch.Size([128])
encoder.layers.0.norm1.weight : torch.Size([128])
encoder.layers.0.norm1.bias : torch.Size([128])
encoder.layers.0.norm2.weight : torch.Size([128])
encoder.layers.0.norm2.bias : torch.Size([128])
encoder.layers.1.self_attn.in_proj_weight : torch.Size([384, 128])
encoder.layers.1.self_attn.in_proj_bias : torch.Size([384])
encoder.layers.1.self_attn.out_proj.weight : torch.Size([128, 128])
encoder.layers.1.self_attn.out_proj.bias : torch.Size([128])
encoder.layers.1.linear1.weight : torch.Size([2048, 128])
encode

In [8]:
model.to(device)

Transformer(
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, in

### 加载数据并embedding转为需要的tensor

In [31]:
X_train, y_train = data.load_data_prefix('train')
X_test, y_test = data.load_data_prefix('test')

X_train.shape

(2000, 50)

### 调整batch_size 默认为1

In [32]:
X_train = torch.from_numpy(X_train.reshape(200, 10, 50)).float() # batch * len * feature batch设置为1
y_train = torch.from_numpy(y_train.reshape(200, 10, 2)).float()

X_train.shape

torch.Size([200, 10, 50])

### 定义嵌入的规则 使用线性层代替词向量的嵌入

In [33]:
embedding_enc = nn.Linear(50, 128)
embedding_dec = nn.Linear(2, 128)

In [34]:
# 嵌入并且batch保持一致 batch * len * d_model
X_train = embedding_enc(X_train).to(device)
y_train = embedding_dec(y_train).to(device)

X_train.shape

torch.Size([200, 10, 128])

### 调用Transformer模型处理

In [13]:
out = model(X_train, y_train)

In [14]:
out.shape

torch.Size([1, 2000, 128])

### 输出过线性层变为坐标

In [15]:
projection = nn.Linear(128, 2)

In [16]:
out = projection(out.squeeze())

In [17]:
out

tensor([[-0.2386, -0.2895],
        [-0.2758, -0.0868],
        [-0.6989, -0.5534],
        ...,
        [ 0.3809, -0.2974],
        [-0.0609, -0.1500],
        [-0.2479, -0.1022]], grad_fn=<AddmmBackward0>)

### 定义Transformer的时序模型

In [2]:
class RFID_TRANS(nn.Module):
    def __init__(self):
        super(RFID_TRANS, self).__init__()
        # self.time_step = 50
        # self.x_dim = 1
        # self.h_dim = 60
        # self.gru_layers = 1

        self.Transformer_layer = nn.Transformer(d_model=128, num_encoder_layers=3, num_decoder_layers=3, batch_first=True)
        self.FC = nn.Linear(128, 2)


    def forward(self, x):
        out = self.Transformer_layer(x)
        out = self.FC(out)
        return out


### 开始训练

In [18]:
criterion = nn.MSELoss()  # 忽略 占位符 索引为0.
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.99)

In [19]:
best_loss = 100000
best_epoch = 0

In [38]:
for epoch in range(50):
    epoch_loss = 0
    dataset = TensorDataset(X_train, y_train)
    data_loader = DataLoader(dataset, batch_size=200, shuffle=False)
    cnt = 0
    for X, y in data_loader:  # enc_inputs : [batch * len * d_model] 1 * 2000 * 128
        print(X.shape)
        cnt = cnt + 1
        # enc_inputs=X.unsqueeze(0)   #(1*64*5)
        # # enc_inputs=enc_inputs.squeeze(2)
        # # dec_inputs : [batch_size, ]
        # # dec_outputs: [batch_size, 1]
        # outputs = model(enc_inputs)
        # # print(outputs.shape)
        # outputs = outputs.squeeze(1)
        # outputs = outputs.unsqueeze(0)
        # y = y.unsqueeze(0)
        # # outputs: [batch_size * tgt_len, tgt_vocab_size]
        # loss = criterion(outputs, y.view(1, -1))
        # loss_num = loss.item()
        # epoch_loss += loss_num
        # optimizer.zero_grad()
        # loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        # optimizer.step()
        # y_pre.append(outputs.detach().numpy())
        # y_true.append(y.detach().numpy())

#     if epoch_loss < best_loss:
#         best_loss = epoch_loss
#         best_epoch = epoch
#         best_model_wts = copy.deepcopy(model.state_dict())
#         torch.save(best_model_wts, './result/weight.pth')
#
#     print('Epoch:', '%04d' % (epoch + 1), 'loss =', '{:.6f}'.format(epoch_loss))

    print(cnt)
# # 打印最佳的结果
# print('best_loss::|',best_loss,'---best_epoch::|',best_epoch)

print('ok!')


torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
torch.Size([200, 10, 128])
t