In [16]:
import pickle
import tensorflow as tf
from collections import defaultdict
import random

In [3]:
# load the data
with open('beer_variables.pkl', 'rb') as file:
    beer_variables = pickle.load(file)

# format: xxxIDs[user] = id
userIDs = beer_variables['userIDs']
itemIDs = beer_variables['itemIDs']
userIDs_train = beer_variables['userIDs_train']
itemIDs_train = beer_variables['itemIDs_train']

# format: interactions_xxx: (time, user, item, rating)
interactions_test = beer_variables['interactions_test']
interactions_used = beer_variables['interactions_used']
interactions_train = beer_variables['interactions_train']
interactions_ver = beer_variables['interactions_ver']

# format: interactionsPerUser_xxx: [user] = [(time, item, rating)]
interactionsPerUser_test = beer_variables['interactionsPerUser_test']
interactionsPerUser_used = beer_variables['interactionsPerUser_used']
interactionsPerUser_train = beer_variables['interactionsPerUser_train']
interactionsPerUser_ver = beer_variables['interactionsPerUser_ver']

# format: interactionsWithPrevious_xxx: (time, user, item, lastItem, rating)
interactionsWithPrevious = beer_variables['interactionsWithPrevious']
interactionsWithPrevious_ver = beer_variables['interactionsWithPrevious_ver']
interactionsWithPrevious_test = beer_variables['interactionsWithPrevious_test']

# format: itemsPerUser_xxx: [user] = [item]
itemsPerUser_train = beer_variables['itemsPerUser_train']
itemsPerUser_ver = beer_variables['itemsPerUser_ver']
itemsPerUser_test = beer_variables['itemsPerUser_test']

# format: items_xxx: [item]
items_train = beer_variables['items_train']
items = beer_variables['items']

In [34]:
item_review_counts = {}
for interaction in interactions_train:
    _, _, item, _ = interaction
    if item in item_review_counts:
        item_review_counts[item] += 1
    else:
        item_review_counts[item] = 1

sorted_items = sorted(item_review_counts.keys(), key=lambda x: item_review_counts[x], reverse=True)
popularItemRanking = {item: rank + 1 for rank, item in enumerate(sorted_items)}

In [36]:
print("Top 5 Popular Items:")
for item, rank in list(popularItemRanking.items())[:5]:
    print(f"Item: {item}, Rank: {rank}")

Top 5 Popular Items:
Item: 1267, Rank: 1
Item: 734, Rank: 2
Item: 10569, Rank: 3
Item: 158, Rank: 4
Item: 473, Rank: 5


In [7]:
optimizer_PRME = tf.keras.optimizers.legacy.Adam(0.1)

class PRME(tf.keras.Model):
    def __init__(self, K, lamb):
        super(PRME, self).__init__()
        # Initialize variables
        self.betaI = tf.Variable(tf.random.normal([len(itemIDs_train)], stddev=0.001))
        self.gammaUI = tf.Variable(tf.random.normal([len(userIDs_train), K], stddev=0.001))
        self.gammaIU = tf.Variable(tf.random.normal([len(itemIDs_train), K], stddev=0.001))
        self.gammaIJ = tf.Variable(tf.random.normal([len(itemIDs_train), K], stddev=0.001))
        self.gammaJI = tf.Variable(tf.random.normal([len(itemIDs_train), K], stddev=0.001))
        # Regularization coefficient
        self.lamb = lamb

    # Prediction for a single instance
    def predict(self, u, i, j):
        p = self.betaI[i] + tf.norm(self.gammaUI[u] - self.gammaIU[i]) + \
            tf.norm(self.gammaIJ[i] - self.gammaJI[j])
        return p

    # Regularizer
    def reg(self):
        return self.lamb * (tf.nn.l2_loss(self.betaI) + \
                            tf.nn.l2_loss(self.gammaUI) + \
                            tf.nn.l2_loss(self.gammaIU) + \
                            tf.nn.l2_loss(self.gammaIJ) + \
                            tf.nn.l2_loss(self.gammaJI))

    def call(self, sampleU,  # user
             sampleI,  # item
             sampleJ,  # previous item
             sampleK):  # negative item
        u = tf.convert_to_tensor(sampleU, dtype=tf.int32)
        i = tf.convert_to_tensor(sampleI, dtype=tf.int32)
        j = tf.convert_to_tensor(sampleJ, dtype=tf.int32)
        k = tf.convert_to_tensor(sampleK, dtype=tf.int32)
        gamma_ui = tf.nn.embedding_lookup(self.gammaUI, u)
        gamma_iu = tf.nn.embedding_lookup(self.gammaIU, i)
        gamma_ij = tf.nn.embedding_lookup(self.gammaIJ, i)
        gamma_ji = tf.nn.embedding_lookup(self.gammaJI, j)
        beta_i = tf.nn.embedding_lookup(self.betaI, i)
        x_uij = beta_i + tf.norm(gamma_ui - gamma_iu) + tf.norm(gamma_ij - gamma_ji)
        gamma_uk = tf.nn.embedding_lookup(self.gammaUI, u)
        gamma_ku = tf.nn.embedding_lookup(self.gammaIU, k)
        gamma_kj = tf.nn.embedding_lookup(self.gammaIJ, k)
        gamma_jk = tf.nn.embedding_lookup(self.gammaJI, j)
        beta_k = tf.nn.embedding_lookup(self.betaI, k)
        x_ukj = beta_k + tf.norm(gamma_uk - gamma_ku) + tf.norm(gamma_kj - gamma_jk)

        # pairwise hinge loss
        return -tf.reduce_mean(tf.math.log(tf.math.sigmoid(x_uij - x_ukj)))

In [8]:
optimizer_FPMC = tf.keras.optimizers.legacy.Adam(0.1)

class FPMC(tf.keras.Model):
    def __init__(self, K, lamb, UI=1, IJ=1):
        super(FPMC, self).__init__()
        # Initialize variables
        self.betaI = tf.Variable(tf.random.normal([len(itemIDs_train)], stddev=0.001))
        self.gammaUI = tf.Variable(tf.random.normal([len(userIDs_train), K], stddev=0.001))
        self.gammaIU = tf.Variable(tf.random.normal([len(itemIDs_train), K], stddev=0.001))
        self.gammaIJ = tf.Variable(tf.random.normal([len(itemIDs_train), K], stddev=0.001))
        self.gammaJI = tf.Variable(tf.random.normal([len(itemIDs_train), K], stddev=0.001))
        # Regularization coefficient
        self.lamb = lamb
        # Which terms to include
        self.UI = UI
        self.IJ = IJ

    # Prediction for a single instance
    def predict(self, u, i, j):
        p = self.betaI[i] + self.UI * tf.tensordot(self.gammaUI[u], self.gammaIU[i], 1) + \
            self.IJ * tf.tensordot(self.gammaIJ[i], self.gammaJI[j], 1)
        return p

    # Regularizer
    def reg(self):
        return self.lamb * (tf.nn.l2_loss(self.betaI) + \
                            tf.nn.l2_loss(self.gammaUI) + \
                            tf.nn.l2_loss(self.gammaIU) + \
                            tf.nn.l2_loss(self.gammaIJ) + \
                            tf.nn.l2_loss(self.gammaJI))

    def call(self, sampleU,  # user
             sampleI,  # item
             sampleJ,  # previous item
             sampleK):  # negative item
        u = tf.convert_to_tensor(sampleU, dtype=tf.int32)
        i = tf.convert_to_tensor(sampleI, dtype=tf.int32)
        j = tf.convert_to_tensor(sampleJ, dtype=tf.int32)
        k = tf.convert_to_tensor(sampleK, dtype=tf.int32)
        gamma_ui = tf.nn.embedding_lookup(self.gammaUI, u)
        gamma_iu = tf.nn.embedding_lookup(self.gammaIU, i)
        gamma_ij = tf.nn.embedding_lookup(self.gammaIJ, i)
        gamma_ji = tf.nn.embedding_lookup(self.gammaJI, j)
        beta_i = tf.nn.embedding_lookup(self.betaI, i)
        x_uij = beta_i + self.UI * tf.reduce_sum(tf.multiply(gamma_ui, gamma_iu), 1) + \
                self.IJ * tf.reduce_sum(tf.multiply(gamma_ij, gamma_ji), 1)
        gamma_uk = tf.nn.embedding_lookup(self.gammaUI, u)
        gamma_ku = tf.nn.embedding_lookup(self.gammaIU, k)
        gamma_kj = tf.nn.embedding_lookup(self.gammaIJ, k)
        gamma_jk = tf.nn.embedding_lookup(self.gammaJI, j)
        beta_k = tf.nn.embedding_lookup(self.betaI, k)
        x_ukj = beta_k + self.UI * tf.reduce_sum(tf.multiply(gamma_uk, gamma_ku), 1) + \
                self.IJ * tf.reduce_sum(tf.multiply(gamma_kj, gamma_jk), 1)

        # pairwise hinge loss
        return -tf.reduce_mean(tf.math.log(tf.math.sigmoid(x_uij - x_ukj)))

In [45]:
# FPMC model
modelFPMC = FPMC(5, 0.00001)
# MF model
modelMF = FPMC(5, 0.00001, 1, 0)
# MC model
modelMC = FPMC(5, 0.00001, 0, 1)
# PRME model
modelPRME = PRME(5, 0.00001)

In [46]:
# load weights
modelFPMC.load_weights('FPMC_model_weights')
modelMF.load_weights('MF_model_weights')
modelMC.load_weights('MC_model_weights')
modelPRME.load_weights('PRME_model_weights')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1ec520d56a0>

In [12]:
interactionsVerPerUser = defaultdict(set)
itemSet_ver = set()
for _,u,i,j,_ in interactionsWithPrevious_ver:
    interactionsVerPerUser[u].add((i,j))
    itemSet_ver.add(i)
    itemSet_ver.add(j)

interactionsTestPerUser = defaultdict(set)
itemSet_test = set()
for _,u,i,j,_ in interactionsWithPrevious_test:
    interactionsTestPerUser[u].add((i,j))
    itemSet_test.add(i)
    itemSet_test.add(j)

In [68]:
def AUCu(model, interactionsPerUser, itemSet, u, N):
    win = 0
    if N > len(interactionsPerUser[u]):
        N = len(interactionsPerUser[u])
    positive = random.choices(list(interactionsPerUser[u]), k=N)
    negative = random.choices(list(itemSet), k=N)
    for (i,j),k in zip(positive,negative):
        # i, j ,k, u may not be in the training set
        if u not in userIDs_train or i not in itemIDs_train or j not in itemIDs_train or k not in itemIDs_train:
            # if j not in popularItemRanking:
            #     continue
            # elif k not in popularItemRanking:
            #     win += 1
            #     continue
            # else:
            #     if popularItemRanking[j] > popularItemRanking[k]:
            #         win += 1
            #     continue
            N -= 1
            continue
        sp = model.predict(userIDs_train[u], itemIDs_train[i], itemIDs_train[j]).numpy()
        sn = model.predict(userIDs_train[u], itemIDs_train[k], itemIDs_train[j]).numpy()
        if sp > sn:
            win += 1
    if N == 0:
        return -1
    else:
        return win/N

In [71]:
def AUC(model, itemSet, interactionsPerUser, N=10):
    av = []
    for u in interactionsPerUser:
        res = AUCu(model, interactionsPerUser, itemSet, u, N)
        if res != -1:
            av.append(res)
    return sum(av) / len(av)

In [76]:
# AUC for FPMC
AUC(modelFPMC, itemSet_ver, interactionsVerPerUser)

0.9121568148372737

In [77]:
# AUC for MF
AUC(modelMF, itemSet_ver, interactionsVerPerUser)

0.9090812345971807

In [78]:
# AUC for MC
AUC(modelMC, itemSet_ver, interactionsVerPerUser)

0.8989835506553421

In [79]:
# AUC for PRME
AUC(modelPRME, itemSet_ver, interactionsVerPerUser)

0.859092983224654

In [72]:
# AUC for FPMC
AUC(modelFPMC, itemSet_test, interactionsTestPerUser)

0.7926197067476788

In [73]:
# AUC for MF
AUC(modelMF, itemSet_test, interactionsTestPerUser)

0.7780864031710215

In [74]:
# AUC for MC
AUC(modelMC, itemSet_test, interactionsTestPerUser)

0.7460062454143627

In [75]:
# AUC for PRME
AUC(modelPRME, itemSet_test, interactionsTestPerUser)

0.7098225032759962