In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
class AutoEncoder(nn.Module):
    def __init__(self, input_dim=768, hidden=[360, 180, 90, 40], output_dim=768):

        super(AutoEncoder, self).__init__()
        self.input_dim = input_dim
        self.hidden = hidden
        self.output_dim = output_dim
        self.encoder = nn.Sequential(
            nn.Linear(self.input_dim, self.hidden[0]),
            nn.ReLU(True),
            nn.Linear(self.hidden[0], self.hidden[1]),
            nn.ReLU(True),
            nn.Linear(self.hidden[1], self.hidden[2]),
            nn.ReLU(True),
            nn.Linear(self.hidden[2], self.hidden[3]),
            nn.ReLU(True),
        )
        self.decoder = nn.Sequential(
            nn.Linear(self.hidden[3], self.hidden[2]),
            nn.ReLU(True),
            nn.Linear(self.hidden[2], self.hidden[1]),
            nn.ReLU(True),
            nn.Linear(self.hidden[1], self.hidden[0]),
            nn.ReLU(True),
            nn.Linear(self.hidden[0], self.output_dim),
            nn.Tanh()
        )

    def forward(self,x):
        latent = self.encoder(x)
        x = self.decoder(latent)
        return x

In [36]:
# 定义训练函数
def train(model, optimizer, criterion, dataloader, num_epochs):
    train_loss = []
    for epoch in range(num_epochs):
        for batch in dataloader:
            optimizer.zero_grad()
            input_data = batch[0]
            output_data = model(input_data)
            loss = criterion(output_data, input_data)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
        print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {np.mean(train_loss):.4f}")


In [37]:
from torch import optim

# 定义超参数

num_epochs = 30
batch_size = 64
lr = 0.001

# 准备数据
api_embed_path = r"D:\workspace\centific\apimodel\data\api_embed_mean.np"
data = np.load(api_embed_path)
data = torch.from_numpy(data).float()

# 定义模型、优化器和损失函数
model = AutoEncoder()
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss(reduction='sum')

#转移到GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
data = data.to(device)

dataset = torch.utils.data.TensorDataset(data)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)

In [38]:
train(model, optimizer, criterion, dataloader, num_epochs)

Epoch 1/30, Train Loss: 2391.3587
Epoch 2/30, Train Loss: 2209.7448
Epoch 3/30, Train Loss: 2113.7043
Epoch 4/30, Train Loss: 2042.1916
Epoch 5/30, Train Loss: 1986.8204
Epoch 6/30, Train Loss: 1942.5816
Epoch 7/30, Train Loss: 1906.3531
Epoch 8/30, Train Loss: 1875.9655
Epoch 9/30, Train Loss: 1849.7637
Epoch 10/30, Train Loss: 1827.1358
Epoch 11/30, Train Loss: 1807.1679
Epoch 12/30, Train Loss: 1789.4174
Epoch 13/30, Train Loss: 1773.6019
Epoch 14/30, Train Loss: 1759.4593
Epoch 15/30, Train Loss: 1746.6381
Epoch 16/30, Train Loss: 1734.9137
Epoch 17/30, Train Loss: 1724.0926
Epoch 18/30, Train Loss: 1714.0988
Epoch 19/30, Train Loss: 1704.7705
Epoch 20/30, Train Loss: 1696.1140
Epoch 21/30, Train Loss: 1688.1072
Epoch 22/30, Train Loss: 1680.5587
Epoch 23/30, Train Loss: 1673.4765
Epoch 24/30, Train Loss: 1666.7830
Epoch 25/30, Train Loss: 1660.4484
Epoch 26/30, Train Loss: 1654.4103
Epoch 27/30, Train Loss: 1648.6677
Epoch 28/30, Train Loss: 1643.2527
Epoch 29/30, Train Loss: 1638

In [40]:
encoder = model.encoder
X_encoded = encoder(data).cpu().detach().numpy()

In [46]:
np.save(r"D:\workspace\centific\apimodel\data\api_embed_ae.npy", X_encoded)