In [14]:
%matplotlib notebook

import torch
import torch.nn as nn
import torch.nn.functional as F
import math as ms
import numpy as np
from numpy import random
from d2l import torch as d2l
import matplotlib.pyplot as plt
import pandas as pd
import time
from sklearn.model_selection import train_test_split
import os
from matplotlib.animation import FuncAnimation
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# torch.manual_seed(0)
# np.random.seed(0)

# This concept is also called teacher forceing.
# The flag decides if the loss will be calculted over all
# or just the predicted values.
calculate_loss_over_all_values = False
cypl = 200
batch_size = 80 # batch size
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

"""1.同时产生多D数据污染
2.对D需要进行编码
"""

def get_data():
#     训练集路径
    data = pd.read_csv('E:/论文/数据/fx3/wz50_pos_noise_35.0000.csv', encoding='GBK', chunksize=100,header = None) 
    data = pd.concat(data, ignore_index=True)
    data = data.loc[:, :]
    train_data, test_data = train_test_split(data, train_size=0.8, random_state=np.random.randint(1,100))
    train_data =torch.tensor(np.array(train_data)).float()
    test_data =torch.tensor(np.array(test_data)).float()
    return train_data, test_data

def input_trans(data,batch_size):
    data = data.reshape(cypl,batch_size)
    data=data.cpu()
    data = data.reshape(cypl,batch_size,1).to(device)
    return data

def noramlization(data):
    return (data - torch.mean(data))/torch.std(data)

def get_batch(source,i, batch_size):
  #  seq_len = min(batch_size, len(source) - 1 - i)
    if i + batch_size > len(source):  # 如果不够，直接返回None
        return None, None
    else:
        seq_len = batch_size
        data = source[i:i + seq_len]
        target = data[:,-1]
        data1 = data[:,0:cypl]
        data1 = data1.t().to(device)
        target = target.reshape(1,batch_size).to(device)
        return data1,target
    
def time_embedding_pos(data,pos):
    data = data.reshape(data.shape[0],data.shape[1])
    data =data.t()
    pos = pos.transpose(0,1)
    sfr = data
    box = torch.zeros(data.shape[0],data.shape[1],cypl)
    box[:,:,0] = sfr
    for j in range(len(box[0,:,0])):
        box[:,j,1:j+1] = torch.flip(box[:,0:j,0], dims=[1])
    box = box.to(device)
    posi = pos[:,:,1:]
    
    box[:,:,1:] += posi
    box = box.transpose(0,1).to(device)
    return box

def dis(src):
    d = src.shape[0]
    for i in range(d):
        src[i,:,1:] = src[i,:,1:] * src[i,-1,0]
    return src

class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_len=cypl):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-ms.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        # pe.requires_grad = False
        self.register_buffer('pe', pe)

    def forward(self, x):
     
        fd = x + self.pe[:x.size(0), :]
        
        return fd

class AttentionLayer(nn.Module):
    def __init__(self, attention, d_model, n_heads, d_keys=None,
                 d_values=None):
        super(AttentionLayer, self).__init__()

        d_keys = d_keys or (d_model//n_heads)
        d_values = d_values or (d_model//n_heads)

        self.inner_attention = attention
        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
        self.value_projection = nn.Linear(d_model, d_values * n_heads)
        self.out_projection = nn.Linear(d_values * n_heads, d_model)
        self.n_heads = n_heads

    def forward(self, queries, keys, values, attn_mask):
        B, L, _ = queries.shape
        _, S, _ = keys.shape
        H = self.n_heads

        queries = self.query_projection(queries).view(B, L, H, -1)
        keys = self.key_projection(keys).view(B, S, H, -1)
        values = self.value_projection(values).view(B, S, H, -1)

        out, attn = self.inner_attention(
            queries,
            keys,
            values,
            attn_mask
        )
        out = out.view(B, L, -1)

        return self.out_projection(out), attn

class ProbAttention(nn.Module):
    def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
        super(ProbAttention, self).__init__()
        self.factor = factor
        self.scale = scale
        self.mask_flag = mask_flag
        self.output_attention = output_attention
        self.dropout = nn.Dropout(attention_dropout)

    def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
        # Q [B, H, L, D]
        B, H, L_K, E = K.shape
        _, _, L_Q, _ = Q.shape

        # calculate the sampled Q_K
        K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
        index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
        K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
        Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()

        # find the Top_k query with sparisty measurement
        M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
        M_top = M.topk(n_top, sorted=False)[1]

        # use the reduced Q to calculate Q_K
        Q_reduce = Q[torch.arange(B)[:, None, None],
                     torch.arange(H)[None, :, None],
                     M_top, :] # factor*ln
        Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k

        return Q_K, M_top

    def _get_initial_context(self, V, L_Q):
        B, H, L_V, D = V.shape
        if not self.mask_flag:
            # V_sum = V.sum(dim=-2)
            V_sum = V.mean(dim=-2)
            contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
        else: # use mask
            assert(L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
            contex = V.cumsum(dim=-2)
        return contex

    def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
        B, H, L_V, D = V.shape

        if self.mask_flag:
            attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
            scores.masked_fill_(attn_mask.mask, -np.inf)

        attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)

        context_in[torch.arange(B)[:, None, None],
                   torch.arange(H)[None, :, None],
                   index, :] = torch.matmul(attn, V).type_as(context_in)
        if self.output_attention:
            attns = (torch.ones([B, H, L_V, L_V])/L_V).type_as(attn).to(attn.device)
            attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
            return (context_in, attns)
        else:
            return (context_in, None)

    def forward(self, queries, keys, values, attn_mask):
        B, L_Q, H, D = queries.shape
        _, L_K, _, _ = keys.shape

        queries = queries.transpose(2,1)
        keys = keys.transpose(2,1)
        values = values.transpose(2,1)

        U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
        u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 

        U_part = U_part if U_part<L_K else L_K
        u = u if u<L_Q else L_Q
        
        scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u) 

        # add scale factor
        scale = self.scale or 1./np.sqrt(D)
        if scale is not None:
            scores_top = scores_top * scale
        # get the context
        context = self._get_initial_context(values, L_Q)
        # update the context with selected top_k queries
        context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)
        
        return context.contiguous(), attn
    
class EncoderLayer(nn.Module):
    def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
        super(EncoderLayer, self).__init__()
        d_ff = d_ff or 4*d_model
        self.attention = attention
        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
        self.activation = F.relu if activation == "relu" else F.gelu

    def forward(self, x, attn_mask=None):
        # x [B, L, D]
        # x = x + self.dropout(self.attention(
        #     x, x, x,
        #     attn_mask = attn_mask
        # ))
        new_x, attn = self.attention(
            x, x, x,
            attn_mask = attn_mask
        )
        x = x + self.dropout(new_x)

        y = x = self.norm1(x)
        y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
        y = self.dropout(self.conv2(y).transpose(-1,1))

        return self.norm2(x+y), attn
    
class Encoder(nn.Module):
    def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
        super(Encoder, self).__init__()
        self.attn_layers = nn.ModuleList(attn_layers)
        self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
        self.norm = norm_layer

    def forward(self, x, attn_mask=None):
        # x [B, L, D]
        attns = []
        if self.conv_layers is not None:
            for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
                x, attn = attn_layer(x, attn_mask=attn_mask)
                x = conv_layer(x)
                attns.append(attn)
            x, attn = self.attn_layers[-1](x)
            attns.append(attn)
        else:
            for attn_layer in self.attn_layers:
                x, attn = attn_layer(x, attn_mask=attn_mask)
                attns.append(attn)

        if self.norm is not None:
            x = self.norm(x)

        return x, attns
    
class TransAm(nn.Module):
    def __init__(self,factor=1, dropout=0.1,attn='prob', d_model=12 ,n_heads = 1,output_attention=False,
                 e_layers = 1,d_ff=12,activation='gelu',device=torch.device('cuda:0')):
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(cypl)
        self.lru = nn.LeakyReLU(negative_slope=0.01, inplace=False)
        self.attn = attn
        self.Relu=nn.ReLU()
        self.maxpooling = torch.nn.MaxPool1d(6, stride=1, padding=1)
        self.BN = nn.BatchNorm1d(300)
        self.cov1 = nn.Conv1d(in_channels=cypl,out_channels=200,kernel_size=10,padding = 8)
        self.cov2 = nn.Conv1d(in_channels=200,out_channels=100,kernel_size=3,padding = 2)
        self.cov3 = nn.Conv1d(in_channels=100,out_channels=12,kernel_size=1,padding = 0)
        self.cov_block = nn.Sequential(self.cov1,self.Relu,self.maxpooling,
                                       self.cov2,self.Relu,self.maxpooling,
                                       self.cov3,self.Relu,self.maxpooling)
        if attn=='prob':
            Attn = ProbAttention 
        self.encoder = Encoder(
                [
                    EncoderLayer(
                        AttentionLayer(Attn(False, factor, attention_dropout = dropout, output_attention =output_attention), 
                                    d_model, n_heads),
                        d_model,
                        d_ff,
                        dropout=dropout,
                        activation=activation
                    ) for l in range(e_layers)
                ],
            )
          
        self.decoder = nn.Sequential(nn.Linear(12, 5),nn.Linear(5, 1))
        self.decoder2 = nn.Sequential(nn.Linear(cypl,120),nn.Tanh(),
                                      nn.Linear(120,100),nn.Tanh(),
                                      nn.Linear(100,10),nn.Tanh(),
                                      nn.Linear(10,2),nn.Tanh()
                                     )

        self.init_weights()


    def init_weights(self):
        initrange = 0.4
        self.decoder[0].bias.data.zero_()
        self.decoder[0].weight.data.uniform_(-initrange, initrange)
        for i in range(0,len(self.decoder2),2):
            self.decoder2[i].bias.data.zero_()
            self.decoder2[i].weight.data.uniform_(-initrange, initrange)
    def forward(self, src):
        output = self.pos_encoder(src)
        output = self.cov_block(output.transpose(0,2).transpose(0,1))
        output,attns = self.encoder(output.transpose(0,2).transpose(1,2)) 
        output = self.decoder(output.transpose(0,1)).squeeze(2)
        output = self.decoder2[0](output.float())
        for i in range(1,len(self.decoder2)): 
            output = self.decoder2[i](output)
            output = noramlization(output)
        return output

def train(train_data,lost,acr,model,lossfix,lr,optimizer,scheduler,epochs,epoch):
    model.train()
    total_loss = 0.
    start_time = time.time()
    
    for batch, i in enumerate(range(0, len(train_data) - 1, batch_size)):
        
        data, targets = get_batch(train_data, i, batch_size)
        optimizer.zero_grad()
        data = input_trans(data,batch_size) 
        data = noramlization(data)
        output = model(data).float()
        targets = targets.reshape([batch_size]).long()
        loss = lossfix(output,targets).sum()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        lost.append(float(loss))
        acr.append(accuracy(output,targets))
        log_interval = int(len(train_data) / batch_size / 4)
        total_loss += loss.item()
#             cur_loss = total_loss / log_interval
        elapsed = time.time() - start_time
        if batch % log_interval == 0 and batch > 0:
            print('| epoch {:3d} |  '
                  'lr {:02.6f} |  '

                  'loss {:5.5f}| acc {:2f}'.format(
                epoch,   scheduler.get_lr()[0],

                loss,accuracy(output,targets)))  # , math.exp(cur_loss)
            total_loss = 0
            start_time = time.time()
            
def accuracy(y_hat, y):
    m = nn.Softmax(dim = 1)
    y_hat = m(y_hat)
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return round(float(cmp.type(y.dtype).sum())/len(y.t()),6)

def train_informer():
    lossfix=nn.CrossEntropyLoss()
    lossfix = lossfix.to(device)
    model = TransAm().to(device)
    lr = 1*batch_size/256
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.75)

    best_val_loss = float("inf")
    epochs = 100 # The number of epochs
    best_model = None
    lost = []
    acr = []
    ts_lost = []
    train_data, test_data = get_data()
    for epoch in range(1, epochs + 1):


        train_data=train_data[torch.randperm(train_data.size(0))]


        print('-' * 89)

        print('-' * 89)

        epoch_start_time = time.time()
        train(train_data,lost,acr,model,lossfix,lr,optimizer,scheduler,epochs,epoch)

        test_acc = 0

        for i in range(20):

            tst,tar = get_batch(test_data,np.random.randint(1,len(test_data) - batch_size - 1),batch_size)

            tst = input_trans(tst,batch_size)
            tst = noramlization(tst)
            tst_output = model(tst).float()
            test_acc += accuracy(tst_output,tar)
        ts_lost.append(test_acc/20) 
        print('-' * 89)
        print('| end of epoch {:3d} | time: {:5.2f}s | test_acc: {:5.6f} |  '.format(
            epoch, (time.time() - epoch_start_time),test_acc/20
           ))  # , math.exp(val_loss) | valid ppl {:8.2f}
        print('-' * 89)

        if test_acc/20 >=0.99 :
            torch.save(model, 'IN_99.pt')
        if test_acc/20 >= 0.95 and test_acc/20<0.99:
            torch.save(model, 'IN_95.pt')
        if test_acc/20 >=0.85 and test_acc/20<0.95:
            torch.save(model, 'IN_85.pt')
    
if __name__ == '__main__':
    train_informer()



-----------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------




| epoch   1 |  lr 0.312500 |  loss 0.15707| acc 1.000000
| epoch   1 |  lr 0.312500 |  loss 0.12920| acc 1.000000
| epoch   1 |  lr 0.312500 |  loss 0.18402| acc 0.987500
| epoch   1 |  lr 0.312500 |  loss 0.12890| acc 1.000000
-----------------------------------------------------------------------------------------
| end of epoch   1 | time:  8.11s | test_acc: 0.996250 |  
-----------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------


KeyboardInterrupt: 