In [2]:
# google drive
from google.colab import drive
drive.mount('/content/drive')
dirpath = "/content/drive/MyDrive/5053/"

Mounted at /content/drive


In [3]:
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import time
import math
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
torch.manual_seed(0)
np.random.seed(0)

calculate_loss_over_all_values = False
input_window = 45
output_window = 5
batch_size = 20 # batch size
device = torch.device("cuda" if torch.cuda.is_available() else "mps")
print(device)

cuda


In [10]:
def get_batch(d,i,num):
    """用于获得每个批次合理大小的源数据和目标数据.
       参数source是通过batchify得到的train_data/val_data/test_data.
       i是具体的批次次数.
    """

    # 首先我们确定句子长度, 它将是在bptt和len(source) - 1 - i中最小值
    # 实质上, 前面的批次中都会是bptt的值, 只不过最后一个批次中, 句子长度
    # 可能不够bptt的35个, 因此会变为len(source) - 1 - i的值.
    seq_len = num

    # 语言模型训练的源数据的第i批数据将是batchify的结果的切片[i:i+seq_len]

    train0 = d[i:i+seq_len]


    # 根据语言模型训练的语料规定, 它的目标数据是源数据向后移动一位
    # 因为最后目标数据的切片会越界, 因此使用view(-1)来保证形状正常.

    test0 = d[i+1:i+1+seq_len]
    return train0,test0
class PositionalEncoding(nn.Module):
 
    def __init__(self, d_model, dropout=0, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
 
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)
 
    def forward(self, x):
#         self.pe = self.pe.expand(-1, batch_size, -1)
        x = x.expand(-1, -1, 512) + self.pe[:x.size(0), :]
        return self.dropout(x)
class TransAm(nn.Module):
    def __init__(self, feature_size=512, num_layers=1, dropout=0):  # feature_size 表示特征维度（必须是head的整数倍）, num_layers 表示 Encoder_layer 的层数， dropout 用于防止过你和
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(feature_size,0)  #位置编码前要做归一化，否则捕获不到位置信息
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=8,dim_feedforward = 64, dropout=dropout)  # 这里用了八个头
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.decoder = nn.Linear(feature_size, 1)  # 这里用全连接层代替了decoder， 其实也可以加一下Transformer的decoder试一下效果
        self.init_weights()
    def init_weights(self):  
        initrange = 0.1
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src):
#         if self.src_key_padding_mask is None:
#             mask_key = src_padding.bool()
#             self.src_key_padding_mask = mask_key
        src = self.pos_encoder(src)
        src = src.transpose(0,1)
        output = self.transformer_encoder(src, src_key_padding_mask=self.src_mask)  
        output = self.decoder(output).view(10,-1)
        return output

class TransformerForAUD:
  def read_data(self, path):
    data = pd.read_csv(dirpath + 'AUD_v2.csv')
    self.raw_value = data['tri'].to_numpy()

  def preprocess(self):
    tri = self.raw_value
    # diff
    tri = tri[1:]
    tri = np.diff(tri)
    self.tri = tri

    # 
    self.device = device
    self.model = TransAm()
    self.model = self.model.to(device)
    self.criterion = nn.MSELoss()
    self.lr = 0.00000001  # 学习率最好设置的小一些，太大的话loss会出现nan的情况
    # optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr)
    self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 3, gamma=0.96)

  
  def train(self):
    self.model.train()  # Turn on the train mode

    for epoch in range(10):
      total_loss = 0.
      start_time = time.time()

      for batch, i in enumerate(range(0, 3000, 1)):
          train_0,test_0 = get_batch(self.tri,i,10)
          train_0 = torch.from_numpy(train_0)
          train_0 = train_0.float()
          train_0 = torch.unsqueeze(train_0,1)
          train_0 = train_0.unsqueeze(2)
          test_0 = torch.from_numpy(test_0)
          test_0 = test_0.float()
          self.optimizer.zero_grad()

          train_0 = train_0.to(device)
          test_0 = test_0.to(device=device)

          output = self.model(train_0)
          loss = self.criterion(output.view(-1), test_0)
          loss.backward()
          torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
          self.optimizer.step()

          total_loss += loss.item()
      print(f"epoch {epoch}, total_loss = {total_loss}")

  # def test(self):

In [11]:
transformer = TransformerForAUD()

In [16]:
transformer.read_data(dirpath + 'AUD_v2.csv')
transformer.preprocess()
print(transformer.device)
print(next(transformer.model.parameters()).device)

cuda
cuda:0


In [17]:
transformer.train()

epoch 0, total_loss = 1858.6368730720133
epoch 1, total_loss = 1694.780916839838
epoch 2, total_loss = 1627.7173247411847
epoch 3, total_loss = 1584.2653926350176
epoch 4, total_loss = 1553.7855370752513
epoch 5, total_loss = 1531.9631012063473
epoch 6, total_loss = 1516.0573090314865
epoch 7, total_loss = 1504.2159503456205
epoch 8, total_loss = 1495.223971599713
epoch 9, total_loss = 1488.2567310575396
