## Sign-aware Bayesian Personalized Ranking

### Import Packages

In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import load_npz
from scipy import sparse
from torch import nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader 
import torch
import torch.nn.functional as F

### Loading data

In [2]:
df_val = pd.read_csv('df_val.csv')

In [3]:
df_train = pd.read_csv('quadruplet.csv')

In [4]:
R = load_npz('R_train.npz')

In [5]:
n_users,n_movies = R.shape

### Construct accuracy function

In [6]:
one_idxs=[]
minus_one_idxs=[]
hold_out=[]
hold_out_minus=[]

In [7]:
for i in range(n_users):
    one_idxs.append(np.where(R[[i],:].toarray()[0] == 1)[0])
    minus_one_idxs.append(np.where(R[[i],:].toarray()[0] == -1)[0])
    hold_out.append(df_val.query(f"userId=={i} & rating==1").movieId.values)
    hold_out_minus.append(df_val.query(f"userId=={i} & rating==-1").movieId.values)

In [8]:
def accuracy_func(model,k=10):
    S = torch.matmul(model.users_embedding.weight.detach(),model.item_embedding.weight.detach().T).to('cpu').numpy()
    accuracy = []
    for i in range(n_users):
        output = S[i]
        np.put(output,one_idxs[i],-np.inf)
        np.put(output,minus_one_idxs[i],-np.inf)
        c = len(np.intersect1d(np.argsort(output)[::-1][:k],hold_out[i]))
        nc = len(np.intersect1d(np.argsort(output)[::-1][:k],hold_out_minus[i]))
#         acc = np.max([0,(c-nc)/(np.min([k,len(hold_out[i])+1]))]) ## Recal@K
        acc = np.max([0,(c-nc)/k]) ## HR@K
        accuracy.append(acc)
    return np.mean(accuracy)

### Prepare data for PyTorch model

In [9]:
class MovieDataset(Dataset):
    def __init__(self,df):
        self.df  = df
        
    def __len__(self):
        return self.df.shape[0]
    def __getitem__(self,idx):
        
        u = torch.tensor(self.df.iloc[idx,0],dtype=torch.int32)
        p = torch.tensor(self.df.iloc[idx,1],dtype=torch.int32)
        n = torch.tensor(self.df.iloc[idx,3],dtype=torch.int32)
        r = torch.tensor(self.df.iloc[idx,2],dtype=torch.int32)
        
        return (u,p,n,r)

In [10]:
train_dataset = MovieDataset(df_train)

In [11]:
BATCH_SIZE = 4096
train_loader = DataLoader(dataset=train_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=True,
                         )

### sBPR model

In [12]:
class sBPR(nn.Module):
    def __init__(self, n_users, n_movies, embedding_size=192,reg=0.01):
        super().__init__()
        
        self.users_embedding = nn.Embedding(n_users,embedding_size)
        self.item_embedding = nn.Embedding(n_movies,embedding_size)
        
        nn.init.xavier_normal_(self.users_embedding.weight)
        nn.init.xavier_normal_(self.item_embedding.weight)
        
        self.reg = reg
        self.embedding_size = embedding_size
        
    def forward(self,u,p,n,r):
        u_ = self.users_embedding(u.to(device))
        p_ = self.item_embedding(p.to(device))
        n_ = self.item_embedding(n.to(device))
        r_ = r.to(device)
        
        
        positive_interaction = torch.mul(u_, p_).sum(dim=1)
        negative_interaction = torch.mul(u_, n_).sum(dim=1)
        sign_delta = ((-1/2*torch.sign(r_)+3/2)*positive_interaction) - negative_interaction
#         sign_delta = (torch.sign(r_)*positive_interaction) - negative_interaction

        log_prob = F.logsigmoid(sign_delta).sum()
        regularization = self.reg * (u_.norm(dim=1).pow(2).sum() + p_.norm(dim=1).pow(2).sum() + n_.norm(dim=1).pow(2).sum())
        return -log_prob + regularization

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [14]:
model = sBPR(n_users,n_movies).to(device)

In [15]:
optimizer= optim.Adam(model.parameters(),lr=0.001)

In [16]:
epochs = 10

for epoch in range(epochs):
    model.train()
    train_losses = []
    for i,(u,p,n,r) in enumerate(train_loader):
        
        cost = model(u,p,n,r) 
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        train_losses.append(cost.item())
    model.eval()
    acc = accuracy_func(model,10)
    
    print(f"Epoch {epoch + 1},train loss: {torch.tensor(train_losses).mean():.4f}, val accuracy: {acc:.4f}")


Epoch 1,train loss: 2612.1199, val accuracy: 0.1691
Epoch 2,train loss: 1587.6693, val accuracy: 0.1699
Epoch 3,train loss: 1410.6881, val accuracy: 0.1959
Epoch 4,train loss: 1297.6490, val accuracy: 0.2062
Epoch 5,train loss: 1229.1805, val accuracy: 0.2150
Epoch 6,train loss: 1184.9893, val accuracy: 0.2223
Epoch 7,train loss: 1153.3771, val accuracy: 0.2263
Epoch 8,train loss: 1129.2069, val accuracy: 0.2290
Epoch 9,train loss: 1109.8672, val accuracy: 0.2305
Epoch 10,train loss: 1094.0647, val accuracy: 0.2314
