# Loads Data

In [204]:
# Based off of Neural Graph Collaborative Filtering
 # Xiang Wang, Xiangnan He, Meng Wang, Fuli Feng, and Tat-Seng Chua. 2019.
 # Neural Graph Collaborative Filtering. In Proceedings of the 42nd International
 # ACM SIGIR Conference on Research and Development in Information Retrieval
 # (SIGIR ’19), July 21–25, 2019, Paris, France. ACM, New York, NY, USA,
 # 10 pages. https://doi.org/10.1145/3331184.3331267

import torch
import random
import numpy as np
import pandas as pd
import torch.nn.functional as F
from torch.nn import init, LeakyReLU, Linear, Module, ModuleList, Parameter

batch_size = 1024 # Set as such because it says this is the optimal number in the above paper

# Gets values from csv
input = pd.read_csv("ratings_small.csv", usecols=["userId", "movieId", "rating"])


In [205]:
# Splits input into training and testing sets
userSet = []
spot = 1
activeItems = []
for a, b, c in input.values:
    if spot == a:
        activeItems.append(b)
    else:
        userSet.append(activeItems)
        activeItems = []
        spot += 1
        activeItems.append(b)
userSet.append(activeItems)

training = []
testing = []
for i in range(len(userSet)):
    length = int(len(userSet[i])*0.7)
    for a in userSet[i][:length]:
        training.append([i+1, a])
    for a in userSet[i][length:]:
        testing.append([i+1, a])


In [206]:
# Gets lists of unique values
userIDList = input['userId'].unique()
movieIDList = input['movieId'].unique()

In [207]:
def get_user_item_matrix(matrix):
    output = pd.DataFrame(input, columns=movieIDList, index=userIDList)
    for a, b in matrix:
        output.at[a, b] = 1
    output.fillna(value = 0, inplace = True)
    output = torch.from_numpy(output.values)
    return output

# Builds the user item interaction matrix (training)
userItemIteractionMatrix = get_user_item_matrix(training)
# Builds the user item interaction matrix (testing)
testingUIMatrix = get_user_item_matrix(testing)

In [208]:
# Builds the positive list
def build_positive_list(matrix):
    positiveList = []
    spot = 1
    temp = []
    for a, b in matrix:
        b = int(b)
        if a == spot:
            temp.append(b)
        else:
            spot += 1
            positiveList.append(temp)
            temp = [b]
    positiveList.append(temp)
    return positiveList

positiveList = build_positive_list(training)

# Creates Adjacency Matrix

In [209]:
numberOfUsers = len(userIDList)
numberOfItems = len(movieIDList)

AdjanceyMatrix = np.zeros((numberOfUsers + numberOfItems, numberOfItems + numberOfUsers))
AdjanceyMatrix[:numberOfUsers, numberOfUsers:] = userItemIteractionMatrix.tolist()
AdjanceyMatrix[numberOfUsers:, :numberOfUsers] = userItemIteractionMatrix.T.tolist()

rowsum = np.array(AdjanceyMatrix.sum(1))

# Tecnique taken from the paper's implementaion instead of the paper itself
 # https://github.com/xiangwang1223/neural_graph_collaborative_filtering
d = np.power(rowsum, -1).flatten()
d[np.isinf(d)] = 0.
d_mat = np.zeros((len(d), len(d)))
for i in range(len(d)):
    d_mat[i][i] = d[i]
NormalizedAdjacencyMatrix = d_mat.dot(AdjanceyMatrix)
stuff = torch.from_numpy(AdjanceyMatrix).to_sparse()
Final = NormalizedAdjacencyMatrix + np.eye(NormalizedAdjacencyMatrix.shape[0])

  d = np.power(rowsum, -1).flatten()


In [210]:

def positive(u):
    movie = random.randint(0, len(positiveList[u-1])-1)
    for i in range(len(movieIDList)):
        if movieIDList[i] == positiveList[u-1][movie]:
            return i

def negative(u):
    while True:
        movie = random.randint(0, numberOfItems-1)
        movie = movieIDList[movie]
        if movie not in positiveList[u-1]:
            for i in range(len(movieIDList)):
                if movieIDList[i] == movie:
                    return i

def get_a_sample():
    selectedUsers = np.random.choice(userIDList, size=batch_size)
    pos_sample, neg_sample = [], []
    for u in selectedUsers:
        pos_sample.append(positive(u))
        neg_sample.append(negative(u))
    return selectedUsers, np.asarray(pos_sample), np.asarray(neg_sample)


# The Model

In [211]:
numberOfLayers = 3

class NGCF(Module):
  def __init__(self, numberOfUsers, numberOfItems, embed_size, adjacencyMatrix):
    super().__init__()
    self.numberOfUsers = numberOfUsers
    self.numberOfItems = numberOfItems
    self.embed_size = embed_size
    self.adj_matrix = adjacencyMatrix

    # This portion is adapted from https://medium.com/@meuleman.mathias/reproducing-neural-graph-collaborative-filtering-a8982c7d3df6 since I still don't understand how Parameter works  
    self.user_embeddings = Parameter(torch.rand(numberOfUsers, embed_size))
    self.item_embeddings = Parameter(torch.rand(numberOfItems, embed_size))
    self.user_embeddings_final = Parameter(torch.zeros((numberOfUsers, embed_size * (numberOfLayers + 1))))
    self.item_embeddings_final = Parameter(torch.zeros((numberOfItems, embed_size * (numberOfLayers + 1))))
    self.W1 = ModuleList([Linear(self.embed_size, self.embed_size) for _ in range(0, numberOfLayers)])
    self.W2 = ModuleList([Linear(self.embed_size, self.embed_size) for _ in range(0, numberOfLayers)])
    self.reLU = LeakyReLU()
    self.init_weights()

  def init_weights(self):
    for name, parameter in self.named_parameters():
      if ('bias' not in name):
        init.xavier_uniform_(parameter)

  def forward(self, u, i, j):
    embeddings = torch.cat((self.user_embeddings, self.item_embeddings))
    final_embeddings = [embeddings]

    # Does the messaging
    for l in range(numberOfLayers):
      t1_embeddings = torch.sparse.mm(torch.from_numpy(self.adj_matrix.astype(np.float32)).to_sparse(), embeddings)
      t1 = self.W1[l](t1_embeddings)
      t2_embeddings = embeddings.mul(t1_embeddings)
      t2 = self.W2[l](t2_embeddings)
      embeddings = self.reLU(t1 + t2)
      normalized_embeddings = F.normalize(embeddings, p=2, dim=1)
      final_embeddings.append(normalized_embeddings)

    final_embeddings = torch.cat(final_embeddings, 1)
    final_u_embeddings, final_i_embeddings = final_embeddings.split((self.numberOfUsers, self.numberOfItems), 0)
    self.user_embeddings_final = Parameter(final_u_embeddings)
    self.item_embeddings_final = Parameter(final_i_embeddings)

    return self.compute_loss(final_u_embeddings[u-1], final_i_embeddings[i], final_i_embeddings[j])

  def compute_loss(self, userE, posE, negE):
    return -(torch.log(torch.sigmoid(torch.mul(userE, posE).sum(dim=1) - torch.mul(userE, negE).sum(dim=1)))).mean()


# Running

In [213]:
model = NGCF(numberOfUsers=numberOfUsers, numberOfItems=numberOfItems, embed_size=64, adjacencyMatrix=Final)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
epochs = 10

model.train()
n_batch = numberOfItems // batch_size + 1

def train(model, t):
  print('Epoch ' + str(t+1))
  total_loss = 0
  for _ in range(n_batch):
    user, pos, neg = get_a_sample()
    optimizer.zero_grad()
    loss = model(torch.from_numpy(user).long(), torch.LongTensor(pos), torch.LongTensor(neg))
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
  print(' Loss = ' + str(total_loss))

for t in range(epochs):
  train(model, t)
  model.train()
  model.eval()


Epoch 1
 Loss = 3.6117654144763947
Epoch 2
 Loss = 2.2050976306200027
Epoch 3
 Loss = 1.8743716776371002
Epoch 4
 Loss = 1.6849270910024643
Epoch 5
 Loss = 1.5634060502052307
Epoch 6
 Loss = 1.5372567921876907
Epoch 7
 Loss = 1.406380757689476
Epoch 8
 Loss = 1.3347729742527008
Epoch 9
 Loss = 1.2633979618549347
Epoch 10
 Loss = 1.1762020513415337


# Evaluating the model

In [214]:
# Gets embedings from model
user_embeddings = model.user_embeddings_final.detach()
item_embeddings = model.item_embeddings_final.detach()

# Calculates precision and recall
recall_parts, ndcg_parts = [], []

non_train_items = (1 - user_embeddings).float()
predictions = torch.mm(user_embeddings, item_embeddings.t())
predictions = predictions * (1-userItemIteractionMatrix)

output = np.zeros((numberOfUsers, numberOfItems))
reversedMatrix = 1-userItemIteractionMatrix
top10 = torch.topk(predictions * reversedMatrix, k=10).indices
for u in range(len(top10)):
  for i in top10[u]:
    output[u][i] = 1

print("Evaluating...")

truePositives = []
falsePositive = []
falseNegatives = []
for user in range(len(output)):
  TPtemp = 0
  FPtemp = 0
  FNtemp = 0
  for i in range(len(output[user])):
    if output[user][i] == 1 and testingUIMatrix[user][i] == 1:
      TPtemp += 1
    elif testingUIMatrix[user][i] == 1 and output[user][i] == 0: #false negative
      FNtemp += 1
    elif testingUIMatrix[user][i] == 1 and output[user][i] == 0: #false positive
      FPtemp += 1
  truePositives.append(TPtemp)
  falsePositive.append(FPtemp)
  falseNegatives.append(FNtemp)

precision = 0
recall = 0
TPs = 0
for i in range(len(truePositives)):
  if (truePositives[i] + falsePositive[i]) != 0:
    precision += truePositives[i]/(truePositives[i] + falsePositive[i])
  if (truePositives[i] + falseNegatives[i]) != 0:
    recall += truePositives[i]/(truePositives[i] + falseNegatives[i])
  TPs += truePositives[i]
  
precision = precision/len(truePositives)
recall = recall/len(truePositives)
falseNegatives = TPs/len(falseNegatives)

print('Precision:\t' + str(precision))
print('Recall:\t\t' + str(recall))

Evaluating...
Precision:	0.14456035767511177
Recall:		0.011635772263848695
