In [15]:
import numpy as np
import torch
import torch.nn as nn
from torch.nn.init import xavier_uniform_, xavier_normal_
import matplotlib.pyplot as plt

from process_data import data, train_data, test_data, item_set
from process_data import TrainDataset, MyDataset, get_dataloader


import warnings
warnings.filterwarnings("ignore")

# 一、GRUwATT搭建

In [3]:
class GRUwATT(nn.Module):
    def __init__(self, input_size, output_size, embedding_size, hidden_size, n_layers=1):
        """
        input_size = output_size = num_item
        """
        super(GRUwATT, self).__init__()

        # === 定义参数 ===
        self.n_items = input_size
        self.output_size = output_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        # === 定义layers ===
        # 1. embedding layer
        self.item_embedding = nn.Embedding(self.n_items, self.embedding_size, padding_idx=0)
        self.emb_dropout = nn.Dropout(0.25)
        # 2. GRU layer
        self.gru = nn.GRU(input_size=self.embedding_size,
                          hidden_size=self.hidden_size,
                          num_layers=self.n_layers,
                          bias=False,
                          batch_first=True)
        # 3. attention
        self.A1 = nn.Linear(self.hidden_size, self.hidden_size, bias=False)
        self.A2 = nn.Linear(self.hidden_size, self.hidden_size, bias=False)
        self.v = nn.Linear(self.hidden_size, 1, bias=False)
        # 4. feedforward layers
        self.feedforward = nn.Linear(2*self.hidden_size, self.embedding_size)

        # === 初始化参数 ===
        xavier_normal_(self.item_embedding.weight)
        xavier_uniform_(self.gru.weight_hh_l0)
        xavier_uniform_(self.gru.weight_ih_l0)
        xavier_normal_(self.feedforward.weight)

    def forward(self, item_seq, item_seq_len):
        """
        Params:
            item_seq, shape = (batch_size, seq_len)
        Return:
            scores, shape = (batch_size*seq_len, output_size)
        """

        # embedding layer
        seq_embedding = self.item_embedding(item_seq) # [bs, seq_len, emb_size]
        gru_input = self.emb_dropout(seq_embedding)

        # GRU layer
        gru_output, _ = self.gru(gru_input)  # [bs, seq_len, hidden_size]

        mask = item_seq.gt(0)
        # === 求出最后一个hidden state ht ===
        batchsize = mask.shape[0]
        dim0_idx = torch.arange(batchsize).long()  # [0, 1, ..., bs-1]，batch中的每个session
        dim1_idx = torch.sum(mask, 1) - 1  # [bs], 每个session中最后一个item的下标
        ht = gru_output[dim0_idx, dim1_idx]  # ht, [bs, hidden_size]

        # === attention部分 ===
        q1 = self.A1(gru_output)  # [bs, seq_len, hidden_size]
        q2 = self.A2(ht)  # [bs, hidden_size]

        q2 = q2.unsqueeze(1).expand_as(q1)
        mask = mask.unsqueeze(2).expand_as(gru_output)  # [bs, seq_len, hidden_size]
        alpha = self.v(mask * torch.sigmoid(q1 + q2)).expand_as(gru_output)  # [bs, seq_len, hidden_size]
        c = alpha * gru_output  # [bs, seq_len, hidden_size]
        c_t = torch.cat([gru_output, c], 2)  # [bs, seq_len, 2*hidden_size]

        # feed forward layer
        output = self.feedforward(c_t)  # [bs, seq_len, emb_size]

        # 映射回item_size空间
        output = output @ self.item_embedding.weight.T  # [bs, seq_len, n_items]

        return output

    def predict(self, item_seq):
        """
        Params:
            item_seq, shape = (batch_size, seq_len)
        Return:
            final_scores, shape = (batch_size, item_size)
        """
        self.eval()
        with torch.no_grad():
            scores = self.forward(item_seq, item_seq.shape[1])  # [bs, seq_len, item_size]
        self.train()
        final_score = scores[:, -1, :]  # 取最后一个timestep，[bs, item_size]
        return final_score

# 二、模型训练

## 评价指标

In [4]:
def test_HR_MRR(model, test_dataset, k=20):
    count = 0
    score = 0
    for X, y in test_dataset:
        X = torch.tensor(X, dtype=torch.long).to(device)
        X = X.unsqueeze(0)  # [1, seq_len]

        # 预测
        output = model.predict(X).cpu().detach()  # [1, item_size]
        output = output.squeeze(0)
        top_k = output.topk(k).indices.numpy()

        if y in top_k:
            # HR
            count += 1
            # MRR
            rank_y = np.where(top_k == y)[0][0] + 1
            score += 1 / rank_y

    HR = count / len(test_dataset)
    MRR = score / len(test_dataset)

    return HR, MRR

## 1. 模型训练设置

定义了GRUwATT的`ModelTrain`类，打包训练过程

In [5]:
class ModelTrain:
    def __init__(self, model, dataset, train_loader, test_loader, optimizer, loss_func, n_epoch):
        self.model = model
        self.train_dataset = dataset[0]
        self.test_dataset = dataset[1]
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.optimizer = optimizer
        self.loss_func = loss_func
        self.n_epoch = n_epoch

        # 记录loss
        self.train_losses = []
        # 记录test上的准确率
        self.test_acc = []

    def _train_one_epoch(self, epoch):
        train_loss = 0.0
        for batch_idx, batch in enumerate(self.train_loader):
            # 获取batch数据
            X = batch[0].to(device)
            y = batch[1].to(device)  # [bs]

            # 模型预测
            output = self.model(X, X.shape[1]) # [bs, seq_len, n_item]
            output = output.reshape(-1, output.shape[-1])  # [bs*seq_len, n_item]

            # 反向传播，更新参数
            loss = self.loss_func(output, y.flatten())
            train_loss += loss.item()
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        if epoch % 20 == 0:
            print(f"\nepoch {epoch} ......")
            print(f"train loss = {train_loss:.2f}")
        self.train_losses.append(train_loss)

    def _test_one_epoch(self):
        # 由于test_loader的返回格式和train_loader不同，所以不用loss而是用accuracy来展示test上的效果
        acc = 0
        batch_idx = 0
        for batch_idx, batch in enumerate(self.test_loader):
            X = batch[0].to(device)
            y = batch[1].to(device)
            pred = self.model.predict(X)
            pred = pred.argmax(1).cpu()
            acc += np.sum(pred.numpy() == y.cpu().numpy()) / pred.shape[0]
        acc /= (batch_idx + 1)
        print(f"test accuracy = {acc:.2f}")
        self.test_acc.append(acc)

        # 计算评价指标
        hit_ratio, MRR = test_HR_MRR(self.model, self.test_dataset)
        print(f"MRR = {MRR:.4f} | HR = {hit_ratio:.4%}")

    def train(self, test=False):
        # 记录清零
        self.train_losses = []
        self.test_acc = []
        # 训练
        self.model.train()
        for e in range(self.n_epoch):
            self._train_one_epoch(e)

            # 在test上进行测试，包括accuracy和评价指标的计算
            if e % 20 == 0 and test:
                self.model.eval()
                with torch.no_grad():
                    self._test_one_epoch()
                self.model.train()

    def get_info(self):
        losses = {'train_losses': self.train_losses,
                  'test_acc': self.test_acc}
        return losses

    def plot_train_loss(self):
        plt.figure()
        plt.plot(self.train_losses)
        plt.ylabel('train loss')
        plt.xlabel('epoch')
        plt.show()

    def plot_test_acc(self):
        plt.figure()
        plt.plot(self.test_acc)
        plt.ylabel('test accuracy')
        plt.xlabel('epoch')
        plt.show()

    def plot_performance(self):
        fig = plt.figure(figsize=(14, 5))

        ax = fig.add_subplot(121)
        ax.plot(self.train_losses)
        ax.set_ylabel('train_loss')
        ax.set_xlabel('epoch')
        ax.set_title("train loss change with epoch")

        ax = fig.add_subplot(122)
        ax.plot([i * 20 for i in range(1, len(self.test_acc)+1)], self.test_acc)
        ax.set_ylabel('test_accuracy')
        ax.set_xlabel('epoch')
        ax.set_title("test accuracy change per 20 epoches")

        plt.show()

## 2. 超参数设置

In [16]:
num_epoch = 201
batch_size = 256
lr = 0.001
embedding_size = 128
hidden_size = 100
item_size = len(item_set) + 1  # +1是因为做了零填充，相当于增加了一个item '0'
output_size = item_size
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [17]:
# 数据
print(len(train_data), len(test_data))

train_dataset = TrainDataset(train_data)
test_dataset = MyDataset(test_data)
print(len(train_dataset), len(test_dataset))

train_loader = get_dataloader(batch_size, dataset=train_dataset, train=True)
test_loader = get_dataloader(batch_size, dataset=test_dataset, train=False)

18055 4514
18055 4514


## 3. 训练和测试

#### lr = 0.01, batch_size = 256

In [8]:
model1 = GRUwATT(item_size, output_size, embedding_size, hidden_size).to(device)

train1 = ModelTrain(model=model1,
                    dataset=(train_dataset, test_dataset),
                    train_loader=train_loader,
                    test_loader=test_loader,
                    optimizer=torch.optim.Adam(model1.parameters(), lr=0.01),
                    loss_func=nn.CrossEntropyLoss(),
                    n_epoch=num_epoch)

train1.train(test=True)


epoch 0 ......
train loss = 592.09
test accuracy = 0.01
MRR = 0.0200 | HR = 6.7125%

epoch 20 ......
train loss = 70.52
test accuracy = 0.29
MRR = 0.3508 | HR = 46.7435%

epoch 40 ......
train loss = 60.59
test accuracy = 0.31
MRR = 0.3714 | HR = 47.4967%

epoch 60 ......
train loss = 59.48
test accuracy = 0.31
MRR = 0.3751 | HR = 47.7404%

epoch 80 ......
train loss = 56.35
test accuracy = 0.32
MRR = 0.3772 | HR = 47.3416%

epoch 100 ......
train loss = 54.86
test accuracy = 0.32
MRR = 0.3806 | HR = 47.1865%

epoch 120 ......
train loss = 56.64
test accuracy = 0.33
MRR = 0.3835 | HR = 47.4302%

epoch 140 ......
train loss = 56.75
test accuracy = 0.32
MRR = 0.3818 | HR = 47.6296%

epoch 160 ......
train loss = 56.88
test accuracy = 0.32
MRR = 0.3777 | HR = 47.5188%

epoch 180 ......
train loss = 60.62
test accuracy = 0.32
MRR = 0.3759 | HR = 47.5853%

epoch 200 ......
train loss = 57.61
test accuracy = 0.32
MRR = 0.3785 | HR = 47.4967%


#### lr = 0.005, batch_size = 256

In [9]:
model2 = GRUwATT(item_size, output_size, embedding_size, hidden_size).to(device)

train2 = ModelTrain(model=model2,
                    dataset=(train_dataset, test_dataset),
                    train_loader=train_loader,
                    test_loader=test_loader,
                    optimizer=torch.optim.Adam(model2.parameters(), lr=0.005),
                    loss_func=nn.CrossEntropyLoss(),
                    n_epoch=num_epoch)

train2.train(test=True)


epoch 0 ......
train loss = 605.42
test accuracy = 0.00
MRR = 0.0100 | HR = 4.0541%

epoch 20 ......
train loss = 58.86
test accuracy = 0.30
MRR = 0.3638 | HR = 46.9650%

epoch 40 ......
train loss = 38.62
test accuracy = 0.34
MRR = 0.3904 | HR = 47.4745%

epoch 60 ......
train loss = 35.62
test accuracy = 0.34
MRR = 0.3931 | HR = 46.5662%

epoch 80 ......
train loss = 31.58
test accuracy = 0.34
MRR = 0.3942 | HR = 46.9428%

epoch 100 ......
train loss = 29.21
test accuracy = 0.35
MRR = 0.3994 | HR = 47.2308%

epoch 120 ......
train loss = 27.92
test accuracy = 0.35
MRR = 0.3972 | HR = 46.8985%

epoch 140 ......
train loss = 27.33
test accuracy = 0.35
MRR = 0.3998 | HR = 47.2087%

epoch 160 ......
train loss = 26.38
test accuracy = 0.36
MRR = 0.4015 | HR = 46.8764%

epoch 180 ......
train loss = 25.50
test accuracy = 0.36
MRR = 0.4007 | HR = 46.5884%

epoch 200 ......
train loss = 25.53
test accuracy = 0.35
MRR = 0.3986 | HR = 46.4112%


#### lr = 0.005, batch_size = 128

In [10]:
train_loader2 = get_dataloader(128, dataset=train_dataset, train=True)
test_loader2 = get_dataloader(128, dataset=test_dataset, train=False)

model3 = GRUwATT(item_size, output_size, embedding_size, hidden_size).to(device)

train3 = ModelTrain(model=model3,
                    dataset=(train_dataset, test_dataset),
                    train_loader=train_loader2,
                    test_loader=test_loader2,
                    optimizer=torch.optim.Adam(model3.parameters(), lr=0.005),
                    loss_func=nn.CrossEntropyLoss(),
                    n_epoch=num_epoch)

train3.train(test=True)


epoch 0 ......
train loss = 1175.10
test accuracy = 0.01
MRR = 0.0222 | HR = 8.2189%

epoch 20 ......
train loss = 121.44
test accuracy = 0.30
MRR = 0.3638 | HR = 47.7625%

epoch 40 ......
train loss = 96.66
test accuracy = 0.33
MRR = 0.3828 | HR = 46.8985%

epoch 60 ......
train loss = 81.95
test accuracy = 0.34
MRR = 0.3931 | HR = 47.0979%

epoch 80 ......
train loss = 76.45
test accuracy = 0.34
MRR = 0.3945 | HR = 46.9428%

epoch 100 ......
train loss = 75.62
test accuracy = 0.34
MRR = 0.3958 | HR = 47.2308%

epoch 120 ......
train loss = 69.76
test accuracy = 0.35
MRR = 0.3972 | HR = 46.8099%

epoch 140 ......
train loss = 71.71
test accuracy = 0.35
MRR = 0.3968 | HR = 46.8321%

epoch 160 ......
train loss = 73.88
test accuracy = 0.34
MRR = 0.3930 | HR = 47.0758%

epoch 180 ......
train loss = 70.15
test accuracy = 0.34
MRR = 0.3950 | HR = 47.0315%

epoch 200 ......
train loss = 80.25
test accuracy = 0.34
MRR = 0.3937 | HR = 46.6105%


#### lr = 0.01, batch_size = 128

In [11]:
train_loader2 = get_dataloader(128, dataset=train_dataset, train=True)
test_loader2 = get_dataloader(128, dataset=test_dataset, train=False)

model4 = GRUwATT(item_size, output_size, embedding_size, hidden_size).to(device)

train4 = ModelTrain(model=model4,
                    dataset=(train_dataset, test_dataset),
                    train_loader=train_loader2,
                    test_loader=test_loader2,
                    optimizer=torch.optim.Adam(model4.parameters(), lr=0.01),
                    loss_func=nn.CrossEntropyLoss(),
                    n_epoch=num_epoch)

train4.train(test=True)


epoch 0 ......
train loss = 1147.16
test accuracy = 0.02
MRR = 0.0433 | HR = 12.9375%

epoch 20 ......
train loss = 197.57
test accuracy = 0.22
MRR = 0.2957 | HR = 44.9712%

epoch 40 ......
train loss = 173.78
test accuracy = 0.26
MRR = 0.3299 | HR = 45.8573%

epoch 60 ......
train loss = 183.83
test accuracy = 0.26
MRR = 0.3335 | HR = 46.8099%

epoch 80 ......
train loss = 162.61
test accuracy = 0.27
MRR = 0.3438 | HR = 46.6770%

epoch 100 ......
train loss = 175.81
test accuracy = 0.27
MRR = 0.3398 | HR = 46.7878%

epoch 120 ......
train loss = 187.89
test accuracy = 0.26
MRR = 0.3326 | HR = 46.7878%

epoch 140 ......
train loss = 184.08
test accuracy = 0.26
MRR = 0.3353 | HR = 46.3890%

epoch 160 ......
train loss = 184.80
test accuracy = 0.27
MRR = 0.3388 | HR = 46.5662%

epoch 180 ......
train loss = 191.87
test accuracy = 0.26
MRR = 0.3358 | HR = 46.7435%

epoch 200 ......
train loss = 181.99
test accuracy = 0.28
MRR = 0.3450 | HR = 46.4998%


# 四、训练并保存模型
用所有数据进行训练，将模型保存到`GRUwATT.pt`

In [18]:
data_all = TrainDataset(data)
loader = get_dataloader(batch_size=256,
                        dataset=data_all,
                        train=True)

In [19]:
model = GRUwATT(item_size, output_size, embedding_size, hidden_size).to(device)

train0 = ModelTrain(model=model,
                    dataset=(data_all, test_dataset),
                    train_loader=loader,
                    test_loader=test_loader,
                    optimizer=torch.optim.Adam(model.parameters(), lr=0.005),
                    loss_func=nn.CrossEntropyLoss(), n_epoch=100)

train0.train(test=True)


epoch 0 ......
train loss = 750.22
test accuracy = 0.01
MRR = 0.0197 | HR = 7.2220%

epoch 20 ......
train loss = 75.06
test accuracy = 0.86
MRR = 0.9407 | HR = 99.6455%

epoch 40 ......
train loss = 53.47
test accuracy = 0.90
MRR = 0.9715 | HR = 100.0000%

epoch 60 ......
train loss = 47.85
test accuracy = 0.91
MRR = 0.9755 | HR = 100.0000%

epoch 80 ......
train loss = 43.69
test accuracy = 0.90
MRR = 0.9718 | HR = 99.9557%


In [20]:
torch.save(train0.model.state_dict(), 'GRUwARTT.pt')