In [39]:
import warnings
warnings.filterwarnings('ignore')
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import time
import os
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset,DataLoader

In [40]:
#check gpu device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')

### Load Data

In [41]:
# load data
dir = 'E:\\Sebnewrepo/Rec_sys_lab/paper1_experiment/'
checkin_file = 'ny_ordered.csv'
df = pd.read_csv(dir + checkin_file)
df.head()

Unnamed: 0,user_id,poi_id,poi_category_id,poi_category_name,latitude,longitude,time_offset,UTC_time,datetime
0,1,4abc1f51f964a520798620e3,4bf58dd8d48988d1ce941735,Seafood Restaurant,40.781558,-73.975792,-240,Wed Apr 04 23:31:31 +0000 2012,2012-04-04 23:31:31
1,1,4d4ac10da0ef54814b6ffff6,4bf58dd8d48988d157941735,American Restaurant,40.784018,-73.974524,-240,Sat Apr 07 17:42:24 +0000 2012,2012-04-07 17:42:24
2,1,4db44994cda1c57c82583709,4bf58dd8d48988d1f1931735,General Entertainment,40.739398,-73.99321,-240,Sun Apr 08 18:20:29 +0000 2012,2012-04-08 18:20:29
3,1,4a541923f964a52008b31fe3,4bf58dd8d48988d14e941735,American Restaurant,40.785677,-73.976498,-240,Sun Apr 08 20:02:10 +0000 2012,2012-04-08 20:02:10
4,1,40f1d480f964a5205b0a1fe3,4bf58dd8d48988d143941735,Breakfast Spot,40.719929,-74.008532,-240,Mon Apr 09 16:20:52 +0000 2012,2012-04-09 16:20:52


In [42]:
# POIs encode, and generate encode mapping
poi_cat = pd.Categorical(df['poi_id'])
poi_encode = poi_cat.codes
#generate poi mapping table
poi_mapping = pd.DataFrame({
    'poi_encode': poi_encode,
    'poi_id': df['poi_id']
    })
#drop duplicate
poi_mapping_output = poi_mapping.drop_duplicates()
df['poi_encode'] = poi_encode
df.drop(['poi_id'], axis = 1, inplace = True)
df.head(5)

Unnamed: 0,user_id,poi_category_id,poi_category_name,latitude,longitude,time_offset,UTC_time,datetime,poi_encode
0,1,4bf58dd8d48988d1ce941735,Seafood Restaurant,40.781558,-73.975792,-240,Wed Apr 04 23:31:31 +0000 2012,2012-04-04 23:31:31,2752
1,1,4bf58dd8d48988d157941735,American Restaurant,40.784018,-73.974524,-240,Sat Apr 07 17:42:24 +0000 2012,2012-04-07 17:42:24,7093
2,1,4bf58dd8d48988d1f1931735,General Entertainment,40.739398,-73.99321,-240,Sun Apr 08 18:20:29 +0000 2012,2012-04-08 18:20:29,7455
3,1,4bf58dd8d48988d14e941735,American Restaurant,40.785677,-73.976498,-240,Sun Apr 08 20:02:10 +0000 2012,2012-04-08 20:02:10,1958
4,1,4bf58dd8d48988d143941735,Breakfast Spot,40.719929,-74.008532,-240,Mon Apr 09 16:20:52 +0000 2012,2012-04-09 16:20:52,499


In [43]:
# only keep user id and sequential POIs

df_input = pd.DataFrame({
    'user_id': df['user_id'] - 1,  # user_id offset by 1
    'poi_id': df['poi_encode'],
    #'implicit': np.ones(179468)
})

In [44]:
df_input = df_input.reset_index(drop = True)
df_input

Unnamed: 0,user_id,poi_id
0,0,2752
1,0,7093
2,0,7455
3,0,1958
4,0,499
...,...,...
179463,1082,1541
179464,1082,466
179465,1082,8703
179466,1082,9987


### Train Test split

In [45]:
def neg_sample_item(num_item, neg_num,item_list):
    neg_list = []
    while len(neg_list)<neg_num:
        neg_item = np.random.choice(num_item, 1)[0]
        while neg_item in item_list:
            neg_item = np.random.choice(num_item, 1)[0]
        neg_list.append(neg_item)
    return neg_list

def generate_train_test_data(data, neg_num):
    # user rating item
    num_item = len(data['poi_id'].unique())
    
    train = []
    test = []
    # split data
    for uid in data['user_id'].unique():
        item_list = data[data['user_id']==uid]['poi_id'].tolist()
        for i in range(len(item_list)-8):
            item_seq = item_list[i:i+8]
            if i == len(item_list)-9:
                neg_list = neg_sample_item(num_item, neg_num,item_list)
                result_slice = [uid] + item_seq + neg_list
                test.append(result_slice)
            else:
                neg_list = neg_sample_item(num_item, neg_num,item_list)
                result_slice = [uid] + item_seq + neg_list
                train.append(result_slice)
    return train, test

In [46]:
%%time
train_data, test_data = generate_train_test_data(df_input, 3)

train_data = torch.from_numpy(np.array(train_data))
test_data = torch.from_numpy(np.array(test_data))
train_x = train_data[:,:6]
train_y = train_data[:,6:]

# construct dataset for train test
train_dataset = TensorDataset(train_x, train_y)
dataloader = DataLoader(dataset=train_dataset, batch_size=512, shuffle=True)

Wall time: 21.2 s


### self att Model

In [61]:
class SelfAttenion(nn.Module):
    def __init__(self, embedding_dim):
        """
        embeding_dim: int, laten vector dim of item
        """
        super(SelfAttenion, self).__init__()
        self.linear1 = nn.Linear(embedding_dim, embedding_dim)
        self.linear1.weight.data.normal_(mean=0, std=np.sqrt(2.0 / embedding_dim))
        # self.linear2 = nn.Linear(embedding_dim, embedding_dim)
        # init weight
        # nn.init.normal_(self.linear1.weight, mean=0, std=np.sqrt(2.0 / embedding_dim)
    
    def forward(self, item_embedding):
        """
        item_embeding: L*d user history L squence interaction item
        """
        Q = F.relu(self.linear1(item_embedding))
        K = F.relu(self.linear1(item_embedding))
        d = torch.FloatTensor([100]).cuda()
        affinity = torch.matmul(Q, torch.transpose(K, 1, 2))/torch.sqrt(d)
        
        # mask the diagonal value
        mask = torch.eye(item_embedding.size(1), item_embedding.size(1)).byte().cuda()
        affinity = affinity.masked_fill(mask, 0)
        S = torch.sigmoid(affinity)
        A = torch.mean(torch.matmul(S, item_embedding), dim=1)
        return A

In [62]:
class AttSeqModel(nn.Module):
    def __init__(self, num_user, num_item, L, w, embedding_dim):
        """
        num_user: int, user number in dataset
        num_item: int, item number in dataset
        L: int the number of history item will consider
        embeding_dim: int, laten vector dim of item
        """
        super(AttSeqModel, self).__init__()
        self.embedding_dim = embedding_dim
        self.num_item = num_item
        self.L = L
        self.user_embed = nn.Embedding(num_user, embedding_dim)
        self.item_embed_short = nn.Embedding(num_item, embedding_dim)
        self.item_embed_long = nn.Embedding(num_item, embedding_dim)
        self.item_position_embed = nn.Embedding.from_pretrained(self.position_embed(L),freeze=True)
        self.att = SelfAttenion(embedding_dim).cuda()
        self.w = w
        
        # embedding init
        self.user_embed.weight.data.normal_(0,1.0/self.user_embed.embedding_dim)
        self.item_embed_short.weight.data.normal_(0, 1.0/self.item_embed_short.embedding_dim)
        self.item_embed_long.weight.data.normal_(0, 1.0/self.item_embed_long.embedding_dim)
        
    def position_embed(self, L):
        position_embedding = np.array([[pos/np.power(1000, 2.*i)/ self.embedding_dim for i in range(self.embedding_dim)]
                                      for pos in range(L)])
        position_embedding[:,0::2] = np.sin(position_embedding[:,0::2])
        position_embedding[:,1::2] = np.cos(position_embedding[:,1::2])
        return torch.from_numpy(position_embedding).cuda()
    
    def forward(self, user, seq_item, target=None, for_pred=False):
        """
        user: uid of user
        seq_item: L item id user interacte before
        target: item
        """
        # sequential item embedding
        item_embedding = self.item_embed_short(seq_item)  # L*d
        # item position embedding
        position_idx = torch.range(0,self.L-1).unsqueeze(0).expand(seq_item.size(0),-1).long().cuda()
        position_embedding = self.item_position_embed(position_idx)
        # add position embedding
        item_embedding_cat = item_embedding.float() + position_embedding.float()
        
        # attention
        attention = self.att(item_embedding_cat)
        
        # user embedding
        user_embedding = self.user_embed(user).squeeze()
        # target embedding short and long note: those two embedding is different 
        if target is None:
            target = torch.range(0,self.num_item-1).long().unsqueeze(0).cuda()
            target_embedding_short = self.item_embed_short(target).squeeze()
            target_embedding_long = self.item_embed_long(target).squeeze()
        else:
            target_embedding_short = self.item_embed_short(target).squeeze()
            target_embedding_long = self.item_embed_long(target).squeeze()
        # pred
        if for_pred == False:
            user_embedding = user_embedding.unsqueeze(1).expand(-1,target.size(1),-1)
            attention = attention.unsqueeze(1).expand(-1,target.size(1),-1)
            y_pred = self.w* torch.sqrt(torch.sum((user_embedding - target_embedding_long)**2, dim=2)) + (1-self.w)*torch.sqrt(torch.sum((attention-target_embedding_short)**2, dim=2))
            return y_pred
        else:
            user_embedding = user_embedding.unsqueeze(0).expand(target.size(1),-1)
            attention = attention.expand(target.size(1),-1)
            y_pred = self.w* torch.sqrt(torch.sum((user_embedding - target_embedding_long)**2, dim=1)) + (1-self.w)*torch.sqrt(torch.sum((attention-target_embedding_short)**2, dim=1))
            return y_pred

In [63]:
# parameters
num_user = len(df_input['user_id'].unique())
num_item = len(df_input['poi_id'].unique())
L = 5
embedding_dim = 100
w = 0.2

In [64]:
def train(model, dataloader, test_data, epochs):
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=0.001,weight_decay=0.0001)
    for epoch in range(epochs):
        losses = []
        start = time.time()
        for train_x, train_y in dataloader:
            user = train_x[:,0].cuda()
            item_seq = train_x[:,1:].cuda()
            target_pos = train_y[:,:3].cuda()
            target_neg = train_y[:,3:].cuda()
            y_pred_pos = model(user, item_seq, target_pos,for_pred=False)
            y_pred_neg = model(user, item_seq, target_neg, for_pred=False)
            optimizer.zero_grad()
            loss = torch.zeros(y_pred_pos.size(0),1).cuda()
            for i in range(y_pred_pos.size(1)):
                l = y_pred_pos[:,i].view(-1,1)
                y_pos_slice = l.expand(-1,y_pred_pos.size(1))
                loss += torch.mean(F.relu(y_pos_slice - y_pred_neg + 0.5), dim=1).unsqueeze(1)
            loss = torch.mean(loss)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
        print("Epoch %d loss is %.3f and consume time is %.2f" %(epoch+1, np.mean(losses), (time.time() - start)))
        hr, mrr = test(model, test_data, 50)
        print("hr is %.3f and mrr is %.3f" %(hr, mrr))

In [65]:
def hr(y_target, y_pred, topk):
    y_pred = y_pred[:topk].cpu().numpy()
    for item in y_pred:
        if item in y_target:
            return 1
    return 0

def mrr(y_target, y_pred, topk):
    y_pred = y_pred[:topk].cpu().numpy()
    for idx in range(len(y_pred)):
        if y_pred[idx] in y_target:
            return 1/(idx+1)
    return 0

def test(model, test_data, topk):
    model.eval()
    HR = []
    MRR = []
    for idx in range(test_data.size(0)):
        uid = test_data[idx,0].unsqueeze(0).cuda()
        item_seq = test_data[idx, 1:6].unsqueeze(0).cuda()
        y_target = test_data[idx,6:9].numpy()
        y_pred = model(uid, item_seq,for_pred=True)
        y_pred = torch.argsort(y_pred)
        hits = hr(y_target, y_pred, topk)
        mrrs = mrr(y_target, y_pred, topk)
        HR.append(hits)
        MRR.append(mrrs)
    return np.mean(HR), np.mean(MRR)

In [66]:
selfatt = AttSeqModel(num_user, num_item, L, w, embedding_dim).cuda()
train(selfatt,dataloader,test_data,20)

Epoch 1 loss is 0.972 and consume time is 3.91
hr is 0.199 and mrr is 0.050
Epoch 2 loss is 0.743 and consume time is 3.87
hr is 0.185 and mrr is 0.047
Epoch 3 loss is 0.724 and consume time is 3.88
hr is 0.194 and mrr is 0.047
Epoch 4 loss is 0.717 and consume time is 3.92
hr is 0.201 and mrr is 0.047
Epoch 5 loss is 0.715 and consume time is 3.95
hr is 0.200 and mrr is 0.051
Epoch 6 loss is 0.713 and consume time is 3.90
hr is 0.202 and mrr is 0.049
Epoch 7 loss is 0.711 and consume time is 3.89
hr is 0.198 and mrr is 0.048
Epoch 8 loss is 0.710 and consume time is 3.88
hr is 0.209 and mrr is 0.049
Epoch 9 loss is 0.709 and consume time is 3.92


KeyboardInterrupt: 