In [1]:
import numpy as np

import argparse
from numpy import arange, random
from torch import save, load, no_grad, LongTensor
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split

In [2]:
def indata_loader(num_file):
    data_array = []
    for i in range(num_file):
        file_name = '/home/mist/pc/wig_fin/pc_fin_' + str(i+1) + '.npy'
        array = np.load(file_name)
        for i in range(num_file):
            data_array.append(array)
    return data_array

In [3]:
def outdata_loader(num_file):
    data_array = []
    for i in range(num_file):
        file_name = '/home/mist/mix/mix_np_out/m_' + str(i+1) + '_a_xa.temt'
        array = np.loadtxt(file_name, skiprows=1, usecols = 1)
        data_array.append(array)
    return data_array

In [4]:
# abundanceloader
import numpy as np
from torch import LongTensor
from torch.utils.data import Dataset

# sequence_length = 1999-1 # ighm
class AbundanceLoader(Dataset):
    def __init__(self, x, y):
        if len(x) != len(y):
            raise ValueError("len(x) != len(y)")
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        return LongTensor(self.x[index]), LongTensor([0] + self.y[index])
    
    def __len__(self):
        return len(self.x)

In [5]:
# model
import torch
from torch import nn
import math

# positional encoding
class PositionalEncoding(nn.Module):
    
    def __init__(self, d_model, dropout=0.1, max_len=1998):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1) #
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

# transformer model
class TransformerModel(nn.Module):
    
    def __init__(self, intoken, outtoken, hidden, nlayers=3, dropout=0.1):
        super(TransformerModel, self).__init__()
        nhead = hidden // 64

        self.encoder = nn.Embedding(intoken, hidden) # nn.Embedding:词典大小，词向量维度
        self.pos_encoder = PositionalEncoding(hidden, dropout)

        self.decoder = nn.Embedding(outtoken, hidden)
        self.pos_decoder = PositionalEncoding(hidden, dropout)

        self.inscale = math.sqrt(intoken)
        self.outscale = math.sqrt(outtoken)

        self.transformer = nn.Transformer(d_model=hidden, nhead=nhead, num_encoder_layers=nlayers,
                                          num_decoder_layers=nlayers, dim_feedforward=hidden, dropout=dropout)
        self.fc_out = nn.Linear(hidden, outtoken)

        self.src_mask = None
        self.trg_mask = None
        self.memory_mask = None

    def generate_square_subsequent_mask(self, sz):
        mask = torch.triu(torch.ones(sz, sz), 1)
        mask = mask.masked_fill(mask == 1, float('-inf'))
        return mask

    def make_len_mask(self, inp):
        return (inp == 0).transpose(0, 1)

    def forward(self, src, trg):
        if self.trg_mask is None or self.trg_mask.size(0) != len(trg):
            self.trg_mask = self.generate_square_subsequent_mask(len(trg)).to(trg.device)

        src_pad_mask = self.make_len_mask(src)
        trg_pad_mask = self.make_len_mask(trg)

        src = self.encoder(src)
        src = self.pos_encoder(src)

        trg = self.decoder(trg)
        trg = self.pos_decoder(trg)
        output = self.transformer(src, trg, tgt_mask=self.trg_mask)
        # output = self.transformer(src, trg, src_mask=self.src_mask, tgt_mask=self.trg_mask,
        #                           memory_mask=self.memory_mask,
        #                           src_key_padding_mask=src_pad_mask, tgt_key_padding_mask=trg_pad_mask,
        #                           memory_key_padding_mask=src_pad_mask)
        output = self.fc_out(output)

        return output

In [6]:
# 训练过程
def train(model, criterion, optimizer, loader):
    model.train()
    epoch_loss = 0
    for i, batch in enumerate(loader):
        src, tgt = batch
        src, tgt = src.transpose(1, 0).cuda(), tgt.transpose(1, 0).cuda()
        optimizer.zero_grad()
        output = model(src, tgt[:-1, :])
        n = output.shape[-1]
        loss = criterion(output.reshape(-1, n), tgt[1:, :].reshape(-1))
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(loader)


# 验证
def validation(model, criterion, loader):
    model.eval()
    epoch_loss = 0
    with no_grad():
        for i, batch in enumerate(loader):
            src, tgt = batch
            src, tgt = src.transpose(1, 0).cuda(), tgt.transpose(1, 0).cuda()
            output = model(src, tgt[:-1, :])
            n = output.shape[-1]
            loss = criterion(output.reshape(-1, n), tgt[1:, :].reshape(-1))
            epoch_loss += loss.item()
    return epoch_loss / len(loader)


# 测试
def test(model, max_len=3, test_times=1):
    model = model.cuda()
    model.eval()
    with no_grad():
        for i in range(test_times):
            s = random.randint(1, 4998)
            cpu_src = [(s + j) * 2 for j in range(max_len)]
            src = LongTensor(cpu_src).unsqueeze(1).cuda()
            tgt = [0] + [(s + j) * 2 + 1 for j in range(max_len)]
            pred = [0]
            for j in range(max_len):
                inp = LongTensor(pred).unsqueeze(1).cuda()
                output = model(src, inp)
                out_num = output.argmax(2)[-1].item()
                pred.append(out_num)
            print("input: ", cpu_src)
            print("target: ", tgt)
            print("predict: ", pred)

In [9]:
# main_function

# num_arrays,样本数？
# hidden = d_model, 词向量维度
# nlayers：encoder，decoder层数
voc_size = 10000
# num_arrays = 512
# array_length = 50
hidden = 64
nlayers = 2
model = TransformerModel(voc_size, voc_size, hidden=hidden, nlayers=nlayers)
model = model.cuda()

num_in = 50
num_out = 2500
inp = indata_loader(num_in)
tgt = outdata_loader(num_out)

batch_size = 32
epochs = 50
dataset = AbundanceLoader(inp, tgt)
train_len = int(len(dataset) * 0.9)
val_len = len(dataset) - train_len
train_set, val_set = random_split(dataset, [train_len, val_len])
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=1)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, num_workers=1)

# optimizer = optim.SGD(model.parameters(), lr=0.5)
optimizer = optim.Adam(model.parameters())
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
criterion = nn.CrossEntropyLoss()
best_loss = 100

for i in range(epochs):
    epoch_loss = train(model, criterion, optimizer, train_loader)
    epoch_loss_val = validation(model, criterion, val_loader)
    # scheduler.step()
    print("epoch: {} train loss: {}".format(i, epoch_loss))
    print("epoch: {} val loss: {}".format(i, epoch_loss_val))
    if epoch_loss_val < best_loss:
        best_loss = epoch_loss_val
        model_name = "/home/mist/mix/model_out/model_{0:.5f}.pt".format(epoch_loss_val) #
        save(model.state_dict(), model_name)
# return model_name

epoch: 0 train loss: 5.850770480196241
epoch: 0 val loss: 4.179392635822296
epoch: 1 train loss: 3.520250696531484
epoch: 1 val loss: 2.7418691515922546
epoch: 2 train loss: 2.470017470104594
epoch: 2 val loss: 2.0271848887205124
epoch: 3 train loss: 1.9716892981193435
epoch: 3 val loss: 1.726217582821846
epoch: 4 train loss: 1.7476668811180223
epoch: 4 val loss: 1.5853588283061981
epoch: 5 train loss: 1.6326723770356515
epoch: 5 val loss: 1.5048100799322128
epoch: 6 train loss: 1.563377662443779
epoch: 6 val loss: 1.4525576084852219
epoch: 7 train loss: 1.5152420796139139
epoch: 7 val loss: 1.410763919353485
epoch: 8 train loss: 1.477576606710192
epoch: 8 val loss: 1.3761582970619202
epoch: 9 train loss: 1.4467167484928185
epoch: 9 val loss: 1.3474269956350327
epoch: 10 train loss: 1.4212190215016756
epoch: 10 val loss: 1.3210552632808685
epoch: 11 train loss: 1.397960577212589
epoch: 11 val loss: 1.2989573031663895
epoch: 12 train loss: 1.3776290718938264
epoch: 12 val loss: 1.278974

In [44]:
torch.cuda.empty_cache()

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 2            |        cudaMalloc retries: 2         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   23135 MB |   23135 MB |   32204 MB |    9068 MB |
|       from large pool |   23114 MB |   23114 MB |   32175 MB |    9061 MB |
|       from small pool |      21 MB |      21 MB |      28 MB |       7 MB |
|---------------------------------------------------------------------------|
| Active memory         |   23135 MB |   23135 MB |   32204 MB |    9068 MB |
|       from large pool |   23114 MB |   23114 MB |   32175 MB |    9061 MB |
|       from small pool |      21 MB |      21 MB |      28 MB |       7 MB |
|---------------------------------------------------------------

In [None]:
# data_generate
import random

def generate_random_arrays(num_arrays, array_length):
    random_arrays = []
    for _ in range(num_arrays):
        array = [random.randint(0, 100) for _ in range(array_length)]  # Change the range (0, 100) to your desired range
        random_arrays.append(array)
    return random_arrays