In [11]:
import numpy as np

import argparse
from numpy import arange, random
from torch import save, load, no_grad, LongTensor
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split

In [12]:
def indata_loader(num_file):
    data_array = []
    for i in range(num_file):
        file_name = '/home/mist/mix/mix_np_out/m_' + str(i+1) + '_a_xa.temt' # 
        array = np.loadtxt(file_name, skiprows=1, usecols = 1) + 3 # +3免去<bos>,<eos>,<pad>的影响
        data_array.append(array)
    return data_array

In [13]:
def outdata_loader(num_file):
    data_array = []
    for i in range(num_file):
        file_name = '/home/mist/pc/wig_fin/pc_fin_' + str(i+1) + '.npy' #
        array = np.load(file_name) + 3 # +3免去<bos>,<eos>,<pad>的影响
        for i in range(num_file):
            data_array.append(array)
    return data_array

In [14]:
# abundanceloader
import numpy as np
from torch import LongTensor
from torch.utils.data import Dataset

# sequence_length = 1999-1 # ighm
class AbundanceLoader(Dataset):
    def __init__(self, x, y):
        if len(x) != len(y):
            raise ValueError("len(x) != len(y)")
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        return LongTensor(self.x[index]), LongTensor([0] + self.y[index])
    
    def __len__(self):
        return len(self.x)

In [20]:
# model
import torch
from torch import nn
import math

# positional encoding
class PositionalEncoding(nn.Module):
    
    def __init__(self, d_model, dropout=0.1, max_len=1998):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1) #
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

# transformer model
class TransformerModel(nn.Module):
    
    def __init__(self, intoken, outtoken, hidden, nlayers=3, dropout=0.1):
        super(TransformerModel, self).__init__()
        nhead = hidden // 64

        self.encoder = nn.Embedding(intoken, hidden) # nn.Embedding:词典大小，词向量维度
        self.pos_encoder = PositionalEncoding(hidden, dropout)

        self.decoder = nn.Embedding(outtoken, hidden)
        self.pos_decoder = PositionalEncoding(hidden, dropout)

        self.inscale = math.sqrt(intoken)
        self.outscale = math.sqrt(outtoken)

        self.transformer = nn.Transformer(d_model=hidden, nhead=nhead, num_encoder_layers=nlayers,
                                          num_decoder_layers=nlayers, dim_feedforward=hidden, dropout=dropout)
        self.fc_out = nn.Linear(hidden, outtoken)

        self.src_mask = None
        self.trg_mask = None
        self.memory_mask = None

    def generate_square_subsequent_mask(self, sz):
        mask = torch.triu(torch.ones(sz, sz), 1)
        mask = mask.masked_fill(mask == 1, float('-inf'))
        return mask

    def make_len_mask(self, inp):
        return (inp == 0).transpose(0, 1)

    def forward(self, src, trg):
        if self.trg_mask is None or self.trg_mask.size(0) != len(trg):
            self.trg_mask = self.generate_square_subsequent_mask(len(trg)).to(trg.device)

        src_pad_mask = self.make_len_mask(src)
        trg_pad_mask = self.make_len_mask(trg)

        src = self.encoder(src)
        src = self.pos_encoder(src)

        trg = self.decoder(trg)
        trg = self.pos_decoder(trg)
        output = self.transformer(src, trg, tgt_mask=self.trg_mask)
        # output = self.transformer(src, trg, src_mask=self.src_mask, tgt_mask=self.trg_mask,
        #                           memory_mask=self.memory_mask,
        #                           src_key_padding_mask=src_pad_mask, tgt_key_padding_mask=trg_pad_mask,
        #                           memory_key_padding_mask=src_pad_mask)
        output = self.fc_out(output)

        return output

In [32]:
test_set[0][0]

tensor([3, 3, 3,  ..., 3, 3, 3])

In [40]:
def predict(input_seq, model, max_length=1998):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Do not calculate gradients
        # Convert input to tensor
        input_tensor = torch.tensor(input_seq).unsqueeze(1).to(device)  # Reshape and move to device
        # Create initial decoder input (start with the start-of-sequence token)
        decoder_input = torch.tensor([1]).unsqueeze(1).to(device)  # Here we assume 1 is the <sos> token
        output = []
        for _ in range(max_length):
            output_tensor = model(input_tensor, decoder_input)
            # Choose the word with the highest probability
            prediction = output_tensor.argmax(dim=2)[-1, :].item()
            if prediction == 2:  # Stop if end-of-sequence token is predicted, we assume 2 is the <eos> token
                break
            output.append(prediction)
            # Add prediction to decoder input
            decoder_input = torch.cat((decoder_input, torch.tensor([[prediction]]).to(device)), dim=0)
    return output  # This is a list of predicted indices



In [41]:
a=predict(test_set[0][0],model)

  input_tensor = torch.tensor(input_seq).unsqueeze(1).to(device)  # Reshape and move to device


In [43]:
# 训练过程
def train(model, criterion, optimizer, loader):
    model.train()
    epoch_loss = 0
    for i, batch in enumerate(loader):
        src, tgt = batch
        src, tgt = src.transpose(1, 0).cuda(), tgt.transpose(1, 0).cuda()
        optimizer.zero_grad()
        output = model(src, tgt[:-1, :])
        n = output.shape[-1]
        loss = criterion(output.reshape(-1, n), tgt[1:, :].reshape(-1))
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(loader)


# 验证
def validation(model, criterion, loader):
    model.eval()
    epoch_loss = 0
    with no_grad():
        for i, batch in enumerate(loader):
            src, tgt = batch
            src, tgt = src.transpose(1, 0).cuda(), tgt.transpose(1, 0).cuda()
            output = model(src, tgt[:-1, :])
            n = output.shape[-1]
            loss = criterion(output.reshape(-1, n), tgt[1:, :].reshape(-1))
            epoch_loss += loss.item()
    return epoch_loss / len(loader)


# # 测试
def test(model, test_set, max_len=1998):
    model = model.cuda()
    model.eval()
    with no_grad():
        for i in range(len(test_set)):
            cpu_src = test_set[i][0].numpy()
            src = LongTensor(cpu_src).unsqueeze(1).cuda()
            tgt = test_set[i][1].numpy()
            np.savetxt('/home/mist/test/test_src_'+str(i)+'.txt',cpu_src, delimiter='\n') #
            np.savetxt('/home/mist/test/test_tgt_'+str(i)+'.txt',tgt, delimiter='\n') #
            pred = [0]
            for j in range(max_len):
                inp = LongTensor(pred).unsqueeze(1).cuda()
                output = model(src,inp)
                # predict = model.predictor(out[:, -1])
                out_num = output.argmax(2)[-1].item()
                pred.append(out_num)
            np.savetxt('/home/mist/test/pred_'+str(i)+'.txt',pred,delimiter='\n') #

# def test(model, loader, max_len =1998):
#     model.eval()
#     with no_grad():
#         for i, batch in enumerate(loader):
#             src, tgt = batch
#             print("input: ", src)
#             print("target: ", tgt)
#             np.savetxt('/home/mist/test/test_src_'+str(i)+'.txt',src.numpy(), newline='\n') #
#             np.savetxt('/home/mist/test/test_tgt_'+str(i)+'.txt',tgt.numpy(), newline='\n') #
#             src, tgt = src.transpose(1, 0).cuda(), tgt.transpose(1, 0).cuda()
#             pred = [0]
#             for j in range(max_len):
#                 inp = LongTensor(pred).unsqueeze(1).cuda()
#                 output = model(src, inp)
#                 out_num = output.argmax(2)[-1].item()
#                 pred.append(out_num)
#             # print("predict: ", pred)
#             np.savetxt('/home/mist/test/pred_'+str(i)+'.txt',pred) #

In [22]:
# main_function

# num_arrays,样本数？
# hidden = d_model, 词向量维度
# nlayers：encoder，decoder层数
voc_size = 10000
# num_arrays = 512
# array_length = 50
hidden = 64
nlayers = 2
model = TransformerModel(voc_size, voc_size, hidden=hidden, nlayers=nlayers)
model = model.cuda()

num_in = 100 #
num_out = 10 #
inp = indata_loader(num_in)
tgt = outdata_loader(num_out)

batch_size = 16 # 32
epochs = 10 # 50
dataset = AbundanceLoader(inp, tgt)
#train_len = int(len(dataset) * 0.9)
#val_len = len(dataset) - train_len
#train_set, val_set = random_split(dataset, [train_len, val_len])
# test_set
test_len = int(len(dataset) * 0.1)
model_len = len(dataset) - test_len
model_set, test_set = random_split(dataset, [model_len, test_len])
train_len = int(model_len * 0.9)
val_len = model_len - train_len
train_set, val_set = random_split(model_set, [train_len, val_len])

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=1)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=1)
# test_loader = DataLoader(test_set, batch_size=1, shuffle=True, num_workers=1)

# optimizer = optim.SGD(model.parameters(), lr=0.5)
optimizer = optim.Adam(model.parameters())
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
criterion = nn.CrossEntropyLoss()
best_loss = 100

epoch_loss_all = []
epoch_loss_val_all = []
for i in range(epochs):
    epoch_loss = train(model, criterion, optimizer, train_loader)
    epoch_loss_val = validation(model, criterion, val_loader)
    # scheduler.step()
    epoch_loss_all.append(epoch_loss) # 保存
    epoch_loss_val_all.append(epoch_loss_val) # 保存
    print("epoch: {} train loss: {}".format(i, epoch_loss))
    print("epoch: {} val loss: {}".format(i, epoch_loss_val))
#     if epoch_loss_val < best_loss:
#         best_loss = epoch_loss_val
#         model_name = "/home/mist/test/model_out/model_{0:.5f}.pt".format(epoch_loss_val) #
#         save(model.state_dict(), model_name)
# # return model_name

# model.load_state_dict(load(model_name))
# # test(model, test_times=10)
# test(model, test_set)

epoch: 0 train loss: 8.884762128194174
epoch: 0 val loss: 8.430649757385254
epoch: 1 train loss: 8.3525284131368
epoch: 1 val loss: 8.08516788482666
epoch: 2 train loss: 8.023944775263468
epoch: 2 val loss: 7.802675247192383
epoch: 3 train loss: 7.7468611399332685
epoch: 3 val loss: 7.55354118347168
epoch: 4 train loss: 7.513567368189494
epoch: 4 val loss: 7.368093967437744
epoch: 5 train loss: 7.322382688522339
epoch: 5 val loss: 7.217617988586426
epoch: 6 train loss: 7.169830878575643
epoch: 6 val loss: 7.065672874450684
epoch: 7 train loss: 7.018868128458659
epoch: 7 val loss: 6.933077335357666
epoch: 8 train loss: 6.902270078659058
epoch: 8 val loss: 6.776003837585449
epoch: 9 train loss: 6.758063077926636
epoch: 9 val loss: 6.628385066986084


In [29]:
list(test_set[1][1])

[tensor(423),
 tensor(423),
 tensor(423),
 tensor(423),
 tensor(423),
 tensor(423),
 tensor(423),
 tensor(423),
 tensor(564),
 tensor(564),
 tensor(705),
 tensor(705),
 tensor(705),
 tensor(705),
 tensor(846),
 tensor(846),
 tensor(846),
 tensor(846),
 tensor(846),
 tensor(846),
 tensor(846),
 tensor(846),
 tensor(846),
 tensor(846),
 tensor(846),
 tensor(987),
 tensor(1128),
 tensor(1128),
 tensor(1128),
 tensor(1128),
 tensor(1270),
 tensor(1270),
 tensor(1270),
 tensor(1130),
 tensor(1272),
 tensor(1132),
 tensor(1132),
 tensor(1132),
 tensor(1132),
 tensor(1132),
 tensor(1133),
 tensor(1133),
 tensor(1133),
 tensor(1276),
 tensor(1419),
 tensor(1419),
 tensor(1419),
 tensor(1419),
 tensor(1419),
 tensor(1419),
 tensor(1562),
 tensor(1562),
 tensor(1705),
 tensor(1848),
 tensor(1848),
 tensor(1848),
 tensor(1848),
 tensor(1848),
 tensor(1708),
 tensor(1708),
 tensor(1567),
 tensor(1710),
 tensor(1710),
 tensor(1710),
 tensor(1569),
 tensor(1569),
 tensor(1569),
 tensor(1569),
 tenso

In [23]:
test(model, test_set)

KeyboardInterrupt: 

In [9]:
model = TransformerModel(10000, 10000, hidden=hidden, nlayers=nlayers)
model.load_state_dict(load(model_name))
test(model, val_set)

In [17]:
test_len

10

In [10]:
print(model.load_state_dict)

<bound method Module.load_state_dict of TransformerModel(
  (encoder): Embedding(10000, 64)
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (decoder): Embedding(10000, 64)
  (pos_decoder): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
          )
          (linear1): Linear(in_features=64, out_features=64, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=64, out_features=64, bias=True)
          (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dro