In [21]:
import warnings
warnings.filterwarnings('ignore')
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import time
import os
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset,DataLoader

In [22]:
#check gpu device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')

### Load Data

In [23]:
# load data
dir = 'E:\\Sebnewrepo/Rec_sys_lab/paper1_experiment/'
checkin_file = 'ny_ordered.csv'
df = pd.read_csv(dir + checkin_file)
df.head()

Unnamed: 0,user_id,poi_id,poi_category_id,poi_category_name,latitude,longitude,time_offset,UTC_time,datetime
0,1,4abc1f51f964a520798620e3,4bf58dd8d48988d1ce941735,Seafood Restaurant,40.781558,-73.975792,-240,Wed Apr 04 23:31:31 +0000 2012,2012-04-04 23:31:31
1,1,4d4ac10da0ef54814b6ffff6,4bf58dd8d48988d157941735,American Restaurant,40.784018,-73.974524,-240,Sat Apr 07 17:42:24 +0000 2012,2012-04-07 17:42:24
2,1,4db44994cda1c57c82583709,4bf58dd8d48988d1f1931735,General Entertainment,40.739398,-73.99321,-240,Sun Apr 08 18:20:29 +0000 2012,2012-04-08 18:20:29
3,1,4a541923f964a52008b31fe3,4bf58dd8d48988d14e941735,American Restaurant,40.785677,-73.976498,-240,Sun Apr 08 20:02:10 +0000 2012,2012-04-08 20:02:10
4,1,40f1d480f964a5205b0a1fe3,4bf58dd8d48988d143941735,Breakfast Spot,40.719929,-74.008532,-240,Mon Apr 09 16:20:52 +0000 2012,2012-04-09 16:20:52


In [25]:
# POIs encode, and generate encode mapping
poi_cat = pd.Categorical(df['poi_id'])
poi_encode = poi_cat.codes
#generate poi mapping table
poi_mapping = pd.DataFrame({
    'poi_encode': poi_encode,
    'poi_id': df['poi_id']
    })
#drop duplicate
poi_mapping_output = poi_mapping.drop_duplicates()
df['poi_encode'] = poi_encode
df.drop(['poi_id'], axis = 1, inplace = True)
df.head(5)

Unnamed: 0,user_id,poi_category_id,poi_category_name,latitude,longitude,time_offset,UTC_time,datetime,poi_encode
0,1,4bf58dd8d48988d1ce941735,Seafood Restaurant,40.781558,-73.975792,-240,Wed Apr 04 23:31:31 +0000 2012,2012-04-04 23:31:31,2752
1,1,4bf58dd8d48988d157941735,American Restaurant,40.784018,-73.974524,-240,Sat Apr 07 17:42:24 +0000 2012,2012-04-07 17:42:24,7093
2,1,4bf58dd8d48988d1f1931735,General Entertainment,40.739398,-73.99321,-240,Sun Apr 08 18:20:29 +0000 2012,2012-04-08 18:20:29,7455
3,1,4bf58dd8d48988d14e941735,American Restaurant,40.785677,-73.976498,-240,Sun Apr 08 20:02:10 +0000 2012,2012-04-08 20:02:10,1958
4,1,4bf58dd8d48988d143941735,Breakfast Spot,40.719929,-74.008532,-240,Mon Apr 09 16:20:52 +0000 2012,2012-04-09 16:20:52,499


In [26]:
# only keep user id and sequential POIs

df_input = pd.DataFrame({
    'user_id': df['user_id'] - 1,  # user_id offset by 1
    'poi_id': df['poi_encode'],
    #'implicit': np.ones(179468)
})

In [27]:
df_input = df_input.reset_index(drop = True)
df_input

Unnamed: 0,user_id,poi_id
0,0,2752
1,0,7093
2,0,7455
3,0,1958
4,0,499
...,...,...
179463,1082,1541
179464,1082,466
179465,1082,8703
179466,1082,9987


### Train Test split

In [28]:
def neg_sample_item(num_item, neg_num,item_list):
    neg_list = []
    while len(neg_list)<neg_num:
        neg_item = np.random.choice(num_item, 1)[0]
        while neg_item in item_list:
            neg_item = np.random.choice(num_item, 1)[0]
        neg_list.append(neg_item)
    return neg_list

def generate_train_test_data(data, neg_num):
    # user rating item
    num_item = len(data['poi_id'].unique())
    
    train = []
    test = []
    # split data
    for uid in data['user_id'].unique():
        item_list = data[data['user_id']==uid]['poi_id'].tolist()
        for i in range(len(item_list)-8):
            item_seq = item_list[i:i+8]
            if i == len(item_list)-9:
                neg_list = neg_sample_item(num_item, neg_num,item_list)
                result_slice = [uid] + item_seq + neg_list
                test.append(result_slice)
            else:
                neg_list = neg_sample_item(num_item, neg_num,item_list)
                result_slice = [uid] + item_seq + neg_list
                train.append(result_slice)
    return train, test

In [29]:
%%time
train_data, test_data = generate_train_test_data(df_input, 3)

train_data = torch.from_numpy(np.array(train_data))
test_data = torch.from_numpy(np.array(test_data))
train_x = train_data[:,:6]
train_y = train_data[:,6:]

# construct dataset for train test
train_dataset = TensorDataset(train_x, train_y)
dataloader = DataLoader(dataset=train_dataset, batch_size=512, shuffle=True)

Wall time: 20.4 s


### self att Model

In [30]:
class model(nn.Module):
    def __init__(self, num_user, num_item, L, w, embedding_dim, device):
        
        super(model, self).__init__()
        self.embedding_dim = embedding_dim
        self.num_user = num_user
        self.num_item = num_item
        self.L = L  # sequence length
        self.w = w  # learnable para
        
        # define embedding
        
        self.user_embed = nn.Embedding(num_user, embedding_dim).to(device)
        self.item_embed = nn.Embedding(num_item, embedding_dim).to(device)
        self.linear1 = nn.Linear(embedding_dim, embedding_dim).to(device)
        print(self.linear1)
        self.item_position_embed = nn.Embedding.from_pretrained(self.position_embed(L),freeze=True)
        
        # initialize
        self.user_embed.weight.data.normal_(0, 1.0/self.user_embed.embedding_dim)
        self.item_embed.weight.data.normal_(0, 1.0/self.item_embed.embedding_dim)
        self.linear1.weight.data.normal_(mean=0, std=np.sqrt(2.0 / embedding_dim))
    
    def position_embed(self, L):
        position_embedding = np.array([[pos/np.power(1000, 2.*i)/ self.embedding_dim for i in range(self.embedding_dim)]
                                      for pos in range(L)])
        position_embedding[:,0::2] = np.sin(position_embedding[:,0::2])
        position_embedding[:,1::2] = np.cos(position_embedding[:,1::2])
        t = torch.from_numpy(position_embedding).to(device)
        return t
    
    def forward(self, user_id, seq_item, target = None, for_pred = False):
        
        '''
        user_id
        seq_item = L item id that user interact before
        target: item target
        '''
        
        # sequential item embedding
        seq_item = seq_item.to(device)
        item_embedding = self.item_embed(seq_item)
        # item position embedding
        position_idx = torch.range(0, self.L - 1).unsqueeze(0).expand(seq_item.size(0), -1).long().to(device)
        position_embedding = self.item_position_embed(position_idx)
        item_embedding_cat = item_embedding.float() + position_embedding.float()
        
        # self-attention network
        Q = F.relu(self.linear1(item_embedding_cat))
        K = F.relu(self.linear1(item_embedding_cat))
        d = torch.FloatTensor([100]).to(device)
        affinity = torch.matmul(Q, torch.transpose(K, 1, 2))/torch.sqrt(d)
        
        # mask the diagonal value
        mask = torch.eye(item_embedding_cat.size(1), item_embedding_cat.size(1)).byte().to(device)
        affinity = affinity.masked_fill(mask, 0)
        S = F.softmax(affinity)
        attention = torch.mean(torch.matmul(S, item_embedding_cat), dim=1)
        
        # user embedding
        user_id = user_id.to(device)
        user_embedding = self.user_embed(user_id).squeeze()
    
        # target embedding short and long note: those two embedding is different 

        if target is None:
            target = torch.range(0,self.num_item-1).long().unsqueeze(0).to(device)
            target_embedding = self.item_embed(target).squeeze()
        else:
            target = target.to(device)
            target_embedding = self.item_embed(target).squeeze()
            
        # pred
        if for_pred == False:
            user_embedding = user_embedding.unsqueeze(1).expand(-1,target.size(1),-1)
            attention = attention.unsqueeze(1).expand(-1,target.size(1),-1)
            y_pred = self.w* torch.sqrt(torch.sum((user_embedding - target_embedding)**2, dim=2)) + (1-self.w)*torch.sqrt(torch.sum((attention-target_embedding)**2, dim=2))
            return y_pred
        else:
            user_embedding = user_embedding.unsqueeze(0).expand(target.size(1),-1)
            attention = attention.expand(target.size(1),-1)
            y_pred = self.w* torch.sqrt(torch.sum((user_embedding - target_embedding)**2, dim=1)) + (1-self.w)*torch.sqrt(torch.sum((attention-target_embedding)**2, dim=1))
            return y_pred

In [31]:
# parameters
num_user = len(df_input['user_id'].unique())
num_item = len(df_input['poi_id'].unique())
L = 5
embedding_dim = 100
w = 0.2

In [32]:
def train(model, dataloader, test_data, epochs):
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=0.001,weight_decay=0.0001)
    for epoch in range(epochs):
        losses = []
        start = time.time()
        for train_x, train_y in dataloader:
            user = train_x[:,0]
            item_seq = train_x[:,1:]
            #print(item_seq.type)
            target_pos = train_y[:,:3]
            target_neg = train_y[:,3:]
            y_pred_pos = model(user, item_seq, target_pos,for_pred=False)
            y_pred_neg = model(user, item_seq, target_neg, for_pred=False)
            optimizer.zero_grad()
            loss = torch.zeros(y_pred_pos.size(0),1).to(device)
            #loss = torch.zeros(y_pred_pos.size(0),1)
            for i in range(y_pred_pos.size(1)):
                l = y_pred_pos[:,i].view(-1,1)
                y_pos_slice = l.expand(-1,y_pred_pos.size(1))
                # Hinge Loss
                loss += torch.mean(F.relu(y_pos_slice - y_pred_neg + 0.5), dim=1).unsqueeze(1)
            loss = torch.mean(loss)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
        print("Epoch %d loss is %.3f and consume time is %.2f" %(epoch+1, np.mean(losses), (time.time() - start)))
        hr, mrr = test(model, test_data, 50)
        print("hr is %.3f and mrr is %.3f" %(hr, mrr))

In [33]:
def hr(y_target, y_pred, topk):
    y_pred = y_pred[:topk].cpu().numpy()
    for item in y_pred:
        if item in y_target:
            return 1
    return 0

def mrr(y_target, y_pred, topk):
    y_pred = y_pred[:topk].cpu().numpy()
    for idx in range(len(y_pred)):
        if y_pred[idx] in y_target:
            return 1/(idx+1)
    return 0

def test(model, test_data, topk):
    model.eval()
    HR = []
    MRR = []
    for idx in range(test_data.size(0)):
        uid = test_data[idx,0].unsqueeze(0)
        item_seq = test_data[idx, 1:6].unsqueeze(0)
        y_target = test_data[idx,6:9].numpy()
        y_pred = model(uid, item_seq,for_pred=True)
        y_pred = torch.argsort(y_pred)
        hits = hr(y_target, y_pred, topk)
        mrrs = mrr(y_target, y_pred, topk)
        HR.append(hits)
        MRR.append(mrrs)
    return np.mean(HR), np.mean(MRR)

In [34]:
selfatt = model(num_user, num_item, L, w, embedding_dim, device).to(device)
train(selfatt,dataloader,test_data,20)

Linear(in_features=100, out_features=100, bias=True)
Epoch 1 loss is 1.101 and consume time is 5.09
hr is 0.006 and mrr is 0.000
Epoch 2 loss is 0.854 and consume time is 3.80
hr is 0.008 and mrr is 0.001
Epoch 3 loss is 0.821 and consume time is 3.80
hr is 0.006 and mrr is 0.001
Epoch 4 loss is 0.804 and consume time is 3.82
hr is 0.006 and mrr is 0.000
Epoch 5 loss is 0.792 and consume time is 3.80
hr is 0.005 and mrr is 0.000
Epoch 6 loss is 0.781 and consume time is 3.79
hr is 0.008 and mrr is 0.001
Epoch 7 loss is 0.772 and consume time is 3.76
hr is 0.004 and mrr is 0.000
Epoch 8 loss is 0.765 and consume time is 3.83
hr is 0.005 and mrr is 0.000
Epoch 9 loss is 0.758 and consume time is 3.83
hr is 0.002 and mrr is 0.000
Epoch 10 loss is 0.752 and consume time is 3.84
hr is 0.003 and mrr is 0.001
Epoch 11 loss is 0.747 and consume time is 3.83
hr is 0.003 and mrr is 0.001
Epoch 12 loss is 0.742 and consume time is 3.83
hr is 0.002 and mrr is 0.000
Epoch 13 loss is 0.738 and consu