In [1]:
# %matplotlib inline
%matplotlib notebook
import os
from time import ctime, time, sleep
import pandas as pd
import numpy as np
import math
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.backends.cudnn as cudnn
from torch import nn, optim
from torch import functional as F
from torch.utils import data as Data

from d2l import torch as d2l

# torch.set_default_tensor_type('torch.cuda.FloatTensor')

In [2]:
basic_path = './denny/'
cudnn.benchmark = True
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

with open('note.csv', 'w') as f:  # f = open('note.csv', 'a')
    f.write('dataset,hr,ndcg,mrr' + '\n')

device

device(type='cuda', index=0)

In [3]:
class Self_attention(nn.Module):
    def __init__(self, dim_q, dim_k, dim_v):
        super(Self_attention, self).__init__()
        self.dim_q = dim_q
        self.dim_k = dim_k
        self.dim_v = dim_v

        self.linear_q = nn.Linear(dim_q, dim_k, bias=False)
        self.linear_k = nn.Linear(dim_q, dim_k, bias=False)
        self.linear_v = nn.Linear(dim_q, dim_v, bias=False)
        self._norm_fact = 1 / np.sqrt(dim_k)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        q = self.linear_q(x)  # batch, n, dim_k
        k = self.linear_k(x)  # batch, n, dim_k
        v = self.linear_v(x)  # batch, n, dim_v
        dist = torch.matmul(q, k.transpose(-1, -2)) * self._norm_fact  # batch, n, n
        dist = torch.softmax(dist, dim=-1)  # batch, n, n
        att = torch.matmul(self.dropout(dist), v)
        return att
    
ctime()

'Mon Aug  5 17:59:15 2024'

In [4]:
class Add_norm(nn.Module):
    """Layer normalize after residual connection"""
    def __init__(self, normalized_shape, dropout, **kwargs):
        super(Add_norm, self).__init__(**kwargs)
        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(normalized_shape)

    def forward(self, X, Y):
        return self.layer_norm(self.dropout(Y) + X)
    
ctime()

'Mon Aug  5 17:59:15 2024'

In [5]:
def read_file_to_four(path1, path2):
    usrs = open(path1, 'r').readlines()
    neighs = open(path2, 'r').readlines()
    usr_item = []
    neigh_dict = {}
    itemids = []

    for usr in usrs:
        u = usr.split('\n')[0].split(',')[1:]
        usr_item.append(u)
        for idx in u:
            if idx not in itemids:
                itemids.append(idx)

    for neigh in neighs:
        nei = neigh.split('\n')[0].split(',')
        if nei[0] not in neigh_dict:
            neigh_dict[nei[0]] = []
        neigh_dict[nei[0]].append(nei[2:])
        for idx in nei[2:]:
            if idx not in itemids:
                itemids.append(idx)

    neigh_item = []
    for neigh in neigh_dict:
        neigh_item.append(neigh_dict[neigh])

    print(len(itemids))
    return usr_item, itemids, neigh_item

ctime()

'Mon Aug  5 17:59:15 2024'

In [6]:
def get_itemid(usr_item, itemids, neigh_item):
    item_dict = {}  # {itemid1:item1_idx, itemid2:item2_idx, ...} (to handle those discontinuous itemids)
    for idx, val in enumerate(itemids):
        item_dict[val] = idx
    for usr in usr_item:
        for idx, val in enumerate(usr):
            usr[idx] = item_dict[val]
    for neighs in neigh_item:
        for neigh in neighs:
            for idx, val in enumerate(neigh):
                neigh[idx] = item_dict[val]

    item_num = len(itemids)
    return usr_item, neigh_item, item_num

ctime()

'Mon Aug  5 17:59:15 2024'

In [7]:
def get_train_test_dataset(usr_item, neigh_item, item_num):
    train_data = []
    train_target_data = []
    train_neigh_data = []

    valid_data = []
    valid_target_data = []
    valid_neigh_data = []

    test_data = []
    test_target_data = []
    test_neigh_data = []

    length = len(usr_item)
    pos1 = length * 0.8
    pos2 = length * 0.9  # train, valid test
    for neighs in neigh_item:
        for idx, neigh in enumerate(neighs):
            neighs[idx] = neigh[:-1]

    for idx, (usr, neighs) in enumerate(zip(usr_item, neigh_item)):
        if idx < pos1:
            train_data.append(usr[:-1])
            train_target_data.append(usr[-1])
            train_neigh_data.append(neighs)
        elif idx < pos2:
            valid_data.append(usr[:-1])
            valid_target_data.append(usr[-1])
            valid_neigh_data.append(neighs)
        else:
            test_data.append(usr[:-1])
            test_target_data.append(usr[-1])
            test_neigh_data.append(neighs)

    return train_data, train_target_data, train_neigh_data, valid_data, valid_target_data, valid_neigh_data, test_data, test_target_data, test_neigh_data, item_num

ctime()

'Mon Aug  5 17:59:15 2024'

In [8]:
class GRUModel(nn.Module):
    def __init__(self, input_dim, hid_dim, layer_num, item_num, seq_len, k=4):
        super(GRUModel, self).__init__()
        self.item_num = item_num
        self.embedding = nn.Embedding(self.item_num, input_dim)
        self.input_dim = input_dim
        self.hid_dim = hid_dim
        self.layer_num = layer_num
        self.gru = nn.GRU(input_size=self.input_dim, hidden_size=self.hid_dim, num_layers=self.layer_num, batch_first=True)
        self.k = k
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        self.self_att1 = Self_attention(self.input_dim, self.input_dim, self.input_dim)
        self.attention = d2l.DotProductAttention(0.5)

        self.seq_len = seq_len
        self.q1 = nn.Linear(self.seq_len, self.seq_len, bias=False)
        self.k1 = nn.Linear(input_dim, input_dim, bias=False)
        self.v1 = nn.Linear(input_dim, input_dim, bias=False)

        self.add_norm1 = Add_norm(input_dim, 0.5)
        self.add_norm2 = Add_norm(self.seq_len, 0.5)

        self.yita = nn.Parameter(torch.tensor(0.5))

        attention1_module = []
        attention1_module.append(nn.Linear(self.hid_dim, self.hid_dim // 2))
        attention1_module.append(nn.Dropout(0.5))
        self.attention1 = nn.Sequential(*attention1_module)

        attention2_module = []
        attention2_module.append(nn.Linear(self.hid_dim // 2, 1))
        attention2_module.append(nn.Dropout(0.5))
        self.attention2 = nn.Sequential(*attention2_module)

        self.mu = nn.Sequential(nn.Linear(self.input_dim, self.input_dim * self.k), nn.ReLU(),
                                nn.Linear(self.input_dim * self.k, self.input_dim))
        
    def tar_att1(self, x, y):
        Q = self.q1(x)
        K = self.k1(y)
        V = self.v1(y)
        A = torch.matmul(Q, K) / np.sqrt(self.input_dim)
        A = torch.softmax(A, dim=-1)
        O = torch.matmul(A, V.transpose(-1, -2))
        return O
    
    def squashing(self, x, dim=-1):
        squared_norm = (x ** 2).sum(dim=dim, keepdim=True)
        scale = squared_norm / (1 + squared_norm)
        return scale * x / (squared_norm.sqrt() + 1e-8)
    
    def single_dynamic_routing(self, neigh):
        b = torch.zeros(self.seq_len).to(device)  # [6]
        neigh = neigh.to(device)  # [6, 128]
        # a_pis = []
        for i in range(3):
            att_w = self.tar_att1(b, neigh)  # [6]
            c = torch.softmax(b, dim=-1) * self.add_norm2(b, att_w)  # [6]
            c = torch.softmax(c, dim=-1)  # [6]
            s = c @ neigh  # [1,6] @ [6, 128] = [128]
            a = self.squashing(s)  # [128]
            b = b + a @ neigh.transpose(-1, -2)  # [6]
            # a_pis.append(a)
            # if i == 0:
            #     a_pi = a
        # return a
        return a  # , a_pi  # [128], [3, 128]
    
    def single_dynamic_routing_after(self, usr, a_original, a_pi):
        # b_1, b_2, b_3 = torch.zeros(1).to(device), torch.zeros(self.seq_len).to(device), torch.zeros(1).to(device)  # [1], [1]
        # b_1, b_2, b_3 = torch.zeros(1).to(device), torch.zeros(self.seq_len - 1).to(device), torch.zeros(1).to(device)
        b_1, b_2, b_3 = torch.zeros(1).to(device), torch.zeros(1).to(device), torch.zeros(1).to(device)
        a_original, usr, a_pi = a_original.to(device), usr.to(device), a_pi.to(device)   # [128]
        for _ in range(3):
            # a_original = a_original.to(device)  # [128]
            # a_former = a
            c_1, c_2, c_3 = torch.softmax(b_1, dim=-1), torch.softmax(b_2, dim=-1), torch.softmax(b_3, dim=-1)  # [1], [1]
            s = c_1 @ a_original.unsqueeze(0) + c_2 @ usr.unsqueeze(0) + c_3 @ a_pi.unsqueeze(0)  # [128]
            a = self.squashing(s)  # [128]
            b_1, b_2, b_3 = b_1 + a @ a_original.unsqueeze(1), b_2 + a @ usr.unsqueeze(1), b_3 + a @ a_pi.unsqueeze(1)  # [1], [1]
        # a = nn.Linear(256, 128)(torch.concat((a, a_orginal), dim=0)).to(device)
        # print('a:', a.shape)
        return a  # [128]

    
    def multi_dynamic_routing(self, neigh):
        b = torch.randn(self.k, self.seq_len).to(device)  # [4, 6]
        neigh = neigh.to(device)  # [6, 128]
        for i in range(3):
            att_w = self.tar_att1(b, neigh)  # [4, 6]
            c = torch.softmax(b, dim=-1) * self.add_norm2(b, att_w)  # [4, 6]
            c = torch.softmax(c, dim=-1)  # [4, 6]
            s = c @ neigh  # [4, 128]
            a = self.squashing(s)  # [4, 128]
            if i < 2:
                b = b + a @ neigh.transpose(-1, -2)  # [4, 6]
        return a  # [4, 128]
    
    def usr_add_attention(self, outs):
        ret = 0
        usr_hn = outs[:, -1, :]
        usr_hn = usr_hn.to(device)  # [, ]
        usr_hn = self.attention1(usr_hn)  # [, ]
        for i in range(outs.shape[1]):
            usr_hi = outs[:, i, :]
            usr_hi = usr_hi.to(device)
            usr_hi = self.attention1(usr_hi)
            sig_hi_hn = self.sigmoid(usr_hn +usr_hi)
            att = self.attention2(sig_hi_hn).cpu()
            ret += att * outs[:, i, :].cpu()
        return ret

    def forward(self, train_data, train_neigh_data, target=None):
        # embedding for train_data
        train_data = train_data.to(device)
        train_data  = self.embedding(train_data)
        # embedding for neighbor
        train_neigh_data = train_neigh_data.to(device)
        train_neigh_data = self.embedding(train_neigh_data)  # [, , , ]

        batch_size = train_data.shape[0]
        usr_hidden = torch.zeros(self.layer_num, batch_size, self.hid_dim).contiguous().to(device)
        outs, usr_hidden = self.gru(train_data, usr_hidden)  # [, ,], [, ,]

        outs = self.usr_add_attention(outs)
        gru_out = torch.zeros(outs.shape[0], outs.shape[1]).to(device)
        gru_out2 = torch.zeros(outs.shape[0], self.k, outs.shape[1]).to(device)
        cnt = 0

        for usr, neigh in zip(outs, train_neigh_data):
            hidden = torch.zeros(self.layer_num, neigh.shape[0], self.hid_dim).contiguous().to(device)
            nei_out, hidden = self.gru(neigh, hidden)
            usr = usr.to(device)
            nei_out = torch.concat((nei_out, usr.repeat(self.seq_len - 1, 1, 1)), dim=1)
            nei_out = self.usr_add_attention(nei_out)  # [, ]
            nei = torch.zeros(nei_out.shape[0] + 1, nei_out.shape[1])
            nei[0] = usr
            for idx, i in enumerate(nei_out):
                nei[idx + 1] = i
            nei = nei.to(device)
            after = self.self_att1(nei)
            nei = self.add_norm1(nei, after)  # [6, 128]
            # a, a_pi = self.single_dynamic_routing(nei)
            # a_pis = [a_pi.to(device) for a_pi in a_pis]
            usr = torch.Tensor(usr).to(device)
            # a_after = self.single_dynamic_routing_after(usr, a, a_pi)
            # linear_layer = nn.Linear(256, 128).to(device)
            # gru_out[cnt] = linear_layer(torch.concat((a_after, a), dim=0))
            # gru_out[cnt] = gru_out[cnt].to(device)
            nei_pi = nei[1:]
            # gru_out[cnt] = self.single_dynamic_routing_after(usr, a, a_pi)
            gru_out[cnt] = self.single_dynamic_routing(nei)
            gru_out2[cnt] = self.multi_dynamic_routing(nei)
            cnt += 1
        
        item_embedding = [i for i in range(self.item_num)]
        item_embedding = torch.LongTensor(item_embedding).to(device)
        item_embedding = self.embedding(item_embedding)

        score1 = gru_out @ item_embedding.T
        score2 = gru_out2 @ item_embedding.T
        score2 = torch.max(score2, 1).values
        score = self.yita * score1 + (1 - self.yita) * score2
        return score.cpu()
    
ctime()

'Mon Aug  5 17:59:15 2024'

In [9]:
def Metrics(model, test_loader, test_usr_neigh_data, top_k, batch_size):
    hr = 0
    ndcg = 0
    mrr = 0
    usr_num = 0
    start = 0
    end = batch_size
    for x, y, neigh in test_loader:
        x, neigh = x.to(device), neigh.to(device)
        with torch.no_grad():
            scores = model(x, neigh)
        usr_num += scores.shape[0]
        scores = scores.to(device)
        scores, top_k_item_pos = torch.topk(scores, top_k)
        scores = scores.cpu()
        top_k_item_pos = top_k_item_pos.cpu()

        for next_item_pos, top_k_item_pos_per in zip(y, top_k_item_pos):
            if next_item_pos in top_k_item_pos_per:
                hr += 1
                top_k_item_pos_per = top_k_item_pos_per.detach().numpy().tolist()
                idx = top_k_item_pos_per.index(next_item_pos)
                mrr += 1 / (idx + 1)
                ndcg += np.reciprocal(np.log2(idx + 2))
    hr = hr / usr_num
    ndcg = ndcg / usr_num
    mrr = mrr / usr_num
    return hr, ndcg, mrr

ctime()

'Mon Aug  5 17:59:15 2024'

In [10]:
class SpreadLoss(nn.Module):
    def __init__(self, margin=0.2, step_size=0.1, max_margin=0.9):
        super(SpreadLoss, self).__init__()
        self.margin = margin
        self.step_size = step_size
        self.max_margin = max_margin

    def forward(self, output, target):
        batch_size = output.size(0)
        num_classes = output.size(1)

        target_one_hot = torch.eye(num_classes).to(device)[target]

        a_t = (output * target_one_hot).sum(dim=1)
        a_i = output * (1 - target_one_hot)

        loss = torch.clamp(self.margin - (a_t.view(batch_size, 1) - a_i), min=0) ** 2
        loss = loss.sum(dim=1).mean()
        return loss
    
    def update_margin(self):
        self.margin = min(self.max_margin, self.margin + self.step_size)

In [11]:
def train(train_data, train_target_data, train_neigh_data,
          valid_data, valid_target_data, valid_neigh_data,
          gru_input, gru_output, gru_layer_num,
          batch_size, item_num, seq_len, dataset_name, top_k, model_type, model, k):
      train_dataset = Data.TensorDataset(train_data, train_target_data, train_neigh_data)
      train_loader = Data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=4)

      valid_dataset = Data.TensorDataset(valid_data, valid_target_data, valid_neigh_data)
      valid_loader = Data.DataLoader(valid_dataset, batch_size, shuffle=False, num_workers=4)

      print(f'dataset: {dataset_name}, model_type: {model_type}, topN: {top_k}, k: {k}')
      loss_fun = nn.CrossEntropyLoss()
      # loss_fun = SpreadLoss()
      optimizer = optim.Adam(model.parameters(), lr=0.001)
      loss_fun = loss_fun.to(device)
      # optimizer = optimizer.to(device)

      print('training ... ', ctime())

      best_ndcg = 0
      best_hr = 0
      best_mrr = 0
      epoch = 100

      # animator = d2l.Animator(xlabel='epoch', xlim=[1, 30], legend=['HR', 'NDCG', 'MRR', 'loss'])
      # timer = d2l.Timer()

      model_name = dataset_name + '_' + str(model_type) + '_' + str(top_k) + '_k' + str(k)
      threshold = 2
      cnt = 0
      for i in range(epoch):
            model.train()
            loss_total = 0
            train_pbar = tqdm(train_loader, position=0, ncols=100)
            for data, target, neigh in train_pbar:
                  # timer.start()
                  data, target, neigh = data.to(device), target.to(device), neigh.to(device)
                  model = model.to(device)
                  score = model(data, neigh, target)
                  score = score.to(device)
                  loss = loss_fun(score, target)
                  optimizer.zero_grad()
                  loss.backward()
                  optimizer.step()
                  loss_total += loss.item()
                  # timer.stop()
                  train_pbar.set_description(f'Epoch [{i + 1}/{epoch}]')
                  train_pbar.set_postfix({'loss': loss.detach().item()})
            print(i + 1, loss_total / len(train_loader), ctime())
            # loss_fun.update_margin()

            model.eval()
            hr, ndcg, mrr = Metrics(model, valid_loader, valid_neigh_data, top_k, batch_size)
            # animator.add(i + 1, (hr, ndcg, mrr, loss_total / len(train_loader)))
            # plt.pause(0.1)
            # plt.show()
            # print(f'Epoch [{i + 1}/{epoch}]', ctime())
            print('valid_HR:', hr)
            print('valid_NDCG:', ndcg)
            print('valid_MRR:', mrr)

            flag = False
            if hr > best_hr:
                  best_hr = hr
                  cnt = 0
                  flag = True
            if ndcg > best_ndcg:
                  print(dataset_name + '_' + str(model_type) + '_' + str(top_k) + '_' + str(k))
                  torch.save(model.state_dict(), model_name + '.pth')
                  best_ndcg = ndcg
                  cnt = 0
                  flag = True
            if mrr > best_mrr:
                  best_mrr = mrr
                  cnt = 0
                  flag = True
            print('best_HR:', best_hr)
            print('best_NDCG:', best_ndcg)
            print('best_MRR:', best_mrr)
            f = open('./process.txt', 'a')
            f.write(model_name + '  ')
            s1 = 'valid: ' + 'HR: ' + str(hr) + ' NDCG: ' + str(ndcg) + ' MRR: ' + str(mrr)
            f.write(s1 + '\n')
            f.close()
            if not flag:
                  cnt += 1
            if cnt >= threshold:
                  break
      return model_name

ctime()

'Mon Aug  5 17:59:16 2024'

In [12]:
def test(test_data, test_target_data, test_neigh_data,
        gru_input_feature, gru_output_feature, gru_layer_num,
        batch_size, item_num, seq_len, dataset_name, top_k, model_type, model_name, k):
    print(f'{dataset_name} testing ...', ctime())
    test_dataset = Data.TensorDataset(test_data, test_target_data, test_neigh_data)
    test_loader = Data.DataLoader(test_dataset, batch_size, shuffle=False, num_workers=4)
    print(f'dataset: {dataset_name} model_type: {model_type} topN: {top_k}, k: {k}')

    model = GRUModel(gru_input_feature, gru_output_feature, gru_layer_num, item_num, seq_len, k).to(device)
    print(model_name + '.pth')
    model.load_state_dict(torch.load(model_name + '.pth'))
    model.eval()
    test_hr, test_ndcg, test_mrr = Metrics(model, test_loader, test_neigh_data, top_k, batch_size)
    print('test_HR:', test_hr)
    print('test_NDCG:', test_ndcg)
    print('test_MRR:', test_mrr)
    f = open('note.csv', 'a')
    f.write(model_name + ',')
    # s2 = 'test: ' + 'HR: ' + str(test_hr) + ' NDCG: ' + str(test_ndcg) + ' MRR: ' + str(test_mrr)
    s2 = str(test_hr) + ',' + str(test_ndcg) + ',' + str(test_mrr)
    f.write(s2 + '\n')
    f.close()

ctime()

'Mon Aug  5 17:59:16 2024'

In [13]:
torch.manual_seed(4719)
print(f'start: {ctime()}')
batch_size = 64
gru_layer_num = 2
gru_input_feature = 128
gru_output_feature = 128
seq_len = 6
datasets = ['lfm', 'yoochoose']
topn, model_type, k = 5, 'SCAD', 4

for dataset in datasets:
    path1 = basic_path + 'dataset/' + dataset + '/user_item_8.csv'
    path2 = basic_path + 'dataset/' + dataset + '/user_item_neigh.csv'
    usr_item, itemids, neigh_item = read_file_to_four(path1, path2)
    usr_item, neigh_item, item_num = get_itemid(usr_item, itemids, neigh_item)
    train_data, train_target_data, train_neigh_data, valid_data, valid_target_data, valid_neigh_data, test_data, test_target_data, test_neigh_data, item_num = get_train_test_dataset(usr_item, neigh_item, item_num)

    train_data = torch.LongTensor(train_data)  # [9190, 7]
    train_target_data = torch.LongTensor(train_target_data)  # [9190]
    train_neigh_data = torch.LongTensor(train_neigh_data)  # [9190, 5, 7]
    print('train:', train_data.shape, train_target_data.shape, train_neigh_data.shape)

    valid_data = torch.LongTensor(valid_data)  # [1149, 7]
    valid_target_data = torch.LongTensor(valid_target_data)  # [1149]
    valid_neigh_data = torch.LongTensor(valid_neigh_data)  # [1149, 5, 7]
    print('valid:', valid_data.shape, valid_target_data.shape, valid_neigh_data.shape)

    test_data = torch.LongTensor(test_data)  # [1148, 7]
    test_target_data = torch.LongTensor(test_target_data)  # [1148]
    test_neigh_data = torch.LongTensor(test_neigh_data)  # [1148, 5, 7]
    print('test:', test_data.shape, test_target_data.shape, test_neigh_data.shape)

    model = GRUModel(gru_input_feature, gru_output_feature,gru_layer_num, item_num, seq_len, k)

    model_name = train(train_data, train_target_data, train_neigh_data,
                    valid_data, valid_target_data, valid_neigh_data,
                    gru_input_feature, gru_output_feature, gru_layer_num,
                    batch_size, item_num, seq_len, dataset, topn, model_type, model, k)
    print(f'{dataset} train finished')

    test(test_data, test_target_data, test_neigh_data,
        gru_input_feature, gru_output_feature, gru_layer_num,
        batch_size, item_num, seq_len, dataset, topn, model_type, model_name, k)
    print('test finished')

start: Mon Aug  5 17:59:16 2024


FileNotFoundError: [Errno 2] No such file or directory: './dataset/lfm/user_item_8.csv'

In [None]:
# topn, model_type, k = 5, 'SCAD', 4

# model = GRUModel(gru_input_feature, gru_output_feature,gru_layer_num, item_num, seq_len, k)

# model_name = train(train_data, train_target_data, train_neigh_data,
#                    valid_data, valid_target_data, valid_neigh_data,
#                    gru_input_feature, gru_output_feature, gru_layer_num,
#                    batch_size, item_num, seq_len, dataset, topn, model_type, model, k)
# print('train finished')

In [None]:
# test(test_data, test_target_data, test_neigh_data,
#      gru_input_feature, gru_output_feature, gru_layer_num,
#      batch_size, item_num, seq_len, dataset, topn, model_type, model_name, k)
# print('test finished')