In [2]:
import warnings
warnings.filterwarnings('ignore')
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import time
import os
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset,DataLoader

In [3]:
#check gpu device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')

In [10]:
# import cluster
df_cluster = pd.read_csv("poi_cluster.csv")
df_cluster.head()

Unnamed: 0,poi_id,clusters
0,3fd66200f964a52000e71ee3,35
1,3fd66200f964a52000e81ee3,122
2,3fd66200f964a52000f11ee3,21
3,3fd66200f964a52001e51ee3,21
4,3fd66200f964a52001e81ee3,154


In [9]:
# load poi sequential data
dir = 'E:\\Sebnewrepo/Rec_sys_lab/paper1_experiment/'
checkin_file = 'ny_ordered.csv'
df = pd.read_csv(dir + checkin_file)
df.head()

Unnamed: 0,user_id,poi_id,poi_category_id,poi_category_name,latitude,longitude,time_offset,UTC_time,datetime
0,1,4abc1f51f964a520798620e3,4bf58dd8d48988d1ce941735,Seafood Restaurant,40.781558,-73.975792,-240,Wed Apr 04 23:31:31 +0000 2012,2012-04-04 23:31:31
1,1,4d4ac10da0ef54814b6ffff6,4bf58dd8d48988d157941735,American Restaurant,40.784018,-73.974524,-240,Sat Apr 07 17:42:24 +0000 2012,2012-04-07 17:42:24
2,1,4db44994cda1c57c82583709,4bf58dd8d48988d1f1931735,General Entertainment,40.739398,-73.99321,-240,Sun Apr 08 18:20:29 +0000 2012,2012-04-08 18:20:29
3,1,4a541923f964a52008b31fe3,4bf58dd8d48988d14e941735,American Restaurant,40.785677,-73.976498,-240,Sun Apr 08 20:02:10 +0000 2012,2012-04-08 20:02:10
4,1,40f1d480f964a5205b0a1fe3,4bf58dd8d48988d143941735,Breakfast Spot,40.719929,-74.008532,-240,Mon Apr 09 16:20:52 +0000 2012,2012-04-09 16:20:52


In [11]:
df = df.merge(df_cluster, how = 'left', on = 'poi_id')
df.head()

Unnamed: 0,user_id,poi_id,poi_category_id,poi_category_name,latitude,longitude,time_offset,UTC_time,datetime,clusters
0,1,4abc1f51f964a520798620e3,4bf58dd8d48988d1ce941735,Seafood Restaurant,40.781558,-73.975792,-240,Wed Apr 04 23:31:31 +0000 2012,2012-04-04 23:31:31,134
1,1,4d4ac10da0ef54814b6ffff6,4bf58dd8d48988d157941735,American Restaurant,40.784018,-73.974524,-240,Sat Apr 07 17:42:24 +0000 2012,2012-04-07 17:42:24,62
2,1,4db44994cda1c57c82583709,4bf58dd8d48988d1f1931735,General Entertainment,40.739398,-73.99321,-240,Sun Apr 08 18:20:29 +0000 2012,2012-04-08 18:20:29,119
3,1,4a541923f964a52008b31fe3,4bf58dd8d48988d14e941735,American Restaurant,40.785677,-73.976498,-240,Sun Apr 08 20:02:10 +0000 2012,2012-04-08 20:02:10,7
4,1,40f1d480f964a5205b0a1fe3,4bf58dd8d48988d143941735,Breakfast Spot,40.719929,-74.008532,-240,Mon Apr 09 16:20:52 +0000 2012,2012-04-09 16:20:52,153


In [12]:
df_input = pd.DataFrame({
    'user_id': df['user_id'] - 1,  # user_id offset by 1
    'cluster_id': df['clusters'],
    #'implicit': np.ones(179468)
})
df_input.head()

Unnamed: 0,user_id,cluster_id
0,0,134
1,0,62
2,0,119
3,0,7
4,0,153


## Attention Encoder Kernel

In [None]:
class att_encode(nn.Module):
    def __init__(self, num_users, num_items, L, w, embedding_dim, device):
        super(att_encode, self).__init__()
        
        self.L = L
        
        # user and item embeddings
        self.user_embed = nn.Embedding(num_users, embedding_dim).to(device)
        self.item_embed = nn.Embedding(num_items, embedding_dim).to(device)
        self.linear1 = nn.Linear(embedding_dim, embedding_dim).to(device)
        
        # initialize weight
        self.user_embed.weight.data.normal_(0, 1.0 / self.user_embed.embedding_dim)
        self.item_embed.weight.data.normal_(0, 1.0 / self.item_embed.embedding_dim)
        self.linear1.weight.data.normal_(mean=0, std=np.sqrt(2.0 / embedding_dim))
        
    def forward(self, seq_item, user_id, items_to_predict, for_pred = False):
       