#### Preprocessing and evaluation are based on this [code](https://github.com/dawenl/vae_cf), and we complete the experiment of our paper by adding and modifying some here.

In [1]:
import os
import shutil
import sys
import numpy as np
from scipy import sparse
import pandas as pd
import bottleneck as bn
from copy import deepcopy

In [2]:
### change `DATA_DIR` to the location of the dataset
DATA_DIR = 'data/'

In [3]:
raw_data = pd.read_csv(os.path.join(DATA_DIR, 'Netflix.csv'), sep=',', names=['movieId', 'userId', 'rating', 'timestamp'])
raw_data

Unnamed: 0,movieId,userId,rating,timestamp
0,1,1488844,3,2005-09-06
1,1,822109,5,2005-05-13
2,1,885013,4,2005-10-19
3,1,30878,4,2005-12-26
4,1,823519,3,2004-05-03
...,...,...,...,...
100480502,17770,1790158,4,2005-11-01
100480503,17770,1608708,3,2005-07-19
100480504,17770,234275,1,2004-08-07
100480505,17770,255278,4,2004-05-28


In [4]:
raw_data_for_negative = deepcopy(raw_data)
raw_data_for_negative = raw_data_for_negative[raw_data_for_negative ['rating'] < 4]
raw_data_for_negative

# We added this code for negative feedback experiment.

Unnamed: 0,movieId,userId,rating,timestamp
0,1,1488844,3,2005-09-06
4,1,823519,3,2004-05-03
5,1,893988,3,2005-11-17
7,1,1248029,3,2004-04-22
9,1,2238063,3,2005-05-11
...,...,...,...,...
100480499,17770,365996,3,2003-11-10
100480501,17770,311124,3,2005-09-29
100480503,17770,1608708,3,2005-07-19
100480504,17770,234275,1,2004-08-07


In [5]:
# binarize the data (only keep ratings >= 4)
raw_data = raw_data[raw_data['rating'] >= 4]
raw_data

Unnamed: 0,movieId,userId,rating,timestamp
1,1,822109,5,2005-05-13
2,1,885013,4,2005-10-19
3,1,30878,4,2005-12-26
6,1,124105,4,2004-08-05
8,1,1842128,4,2004-05-09
...,...,...,...,...
100480495,17770,1274035,4,2005-06-10
100480498,17770,516110,5,2004-08-26
100480500,17770,986348,4,2004-08-12
100480502,17770,1790158,4,2005-11-01


In [6]:
def get_count(tp, id):
    playcount_groupbyid = tp[[id]].groupby(id, as_index=False)
    count = playcount_groupbyid.size()
    return count

In [7]:
def filter_triplets(tp, min_uc=5, min_sc=0):
    # Only keep the triplets for items which were clicked on by at least min_sc users. 
    if min_sc > 0:
        itemcount = get_count(tp, 'movieId')
        tp = tp[tp['movieId'].isin(itemcount.index[itemcount >= min_sc])]
    
    # Only keep the triplets for users who clicked on at least min_uc items
    # After doing this, some of the items will have less than min_uc users, but should only be a small proportion
    if min_uc > 0:
        usercount = get_count(tp, 'userId')
        tp = tp[tp['userId'].isin(usercount.index[usercount >= min_uc])]
    
    # Update both usercount and itemcount after filtering
    usercount, itemcount = get_count(tp, 'userId'), get_count(tp, 'movieId') 
    return tp, usercount, itemcount

In [8]:
raw_data, user_activity, item_popularity = filter_triplets(raw_data)

In [9]:
sparsity = 1. * raw_data.shape[0] / (user_activity.shape[0] * item_popularity.shape[0])
print("After filtering, there are %d watching events from %d users and %d movies (sparsity: %.3f%%)" % 
      (raw_data.shape[0], user_activity.shape[0], item_popularity.shape[0], sparsity * 100))

After filtering, there are 56880037 watching events from 463435 users and 17769 movies (sparsity: 0.691%)


In [10]:
unique_uid = user_activity.index

np.random.seed(98765)
idx_perm = np.random.permutation(unique_uid.size)
unique_uid = unique_uid[idx_perm]

In [11]:
### create train/validation/test users
n_users = unique_uid.size
n_heldout_users = 10000

tr_users = unique_uid[:(n_users - n_heldout_users * 2)]
vd_users = unique_uid[(n_users - n_heldout_users * 2): (n_users - n_heldout_users)]
te_users = unique_uid[(n_users - n_heldout_users):]

In [12]:
train_plays = raw_data.loc[raw_data['userId'].isin(tr_users)]

In [13]:
unique_sid = pd.unique(train_plays['movieId'])

In [14]:
show2id = dict((sid, i) for (i, sid) in enumerate(unique_sid))
profile2id = dict((pid, i) for (i, pid) in enumerate(unique_uid))

In [15]:
pro_dir = os.path.join(DATA_DIR, 'pro_sg')

if not os.path.exists(pro_dir):
    os.makedirs(pro_dir)

with open(os.path.join(pro_dir, 'unique_sid.txt'), 'w') as f:
    for sid in unique_sid:
        f.write('%s\n' % sid)

In [16]:
def split_train_test_proportion(data, test_prop=0.2):
    data_grouped_by_user = data.groupby('userId')
    tr_list, te_list = list(), list()

    np.random.seed(98765)

    for i, (_, group) in enumerate(data_grouped_by_user):
        n_items_u = len(group)
        
        if n_items_u >= 5:
            idx = np.zeros(n_items_u, dtype='bool')
            idx[np.random.choice(n_items_u, size=int(test_prop * n_items_u), replace=False).astype('int64')] = True
            
            tr_list.append(group[np.logical_not(idx)])
            te_list.append(group[idx])
        else:
            tr_list.append(group)

        if i % 1000 == 0:
            print("%d users sampled" % i)
            sys.stdout.flush()

    data_tr = pd.concat(tr_list)
    data_te = pd.concat(te_list)
    
    return data_tr, data_te

In [17]:
vad_plays = raw_data.loc[raw_data['userId'].isin(vd_users)]
vad_plays = vad_plays.loc[vad_plays['movieId'].isin(unique_sid)]

In [18]:
vad_plays_tr, vad_plays_te = split_train_test_proportion(vad_plays)

0 users sampled
1000 users sampled
2000 users sampled
3000 users sampled
4000 users sampled
5000 users sampled
6000 users sampled
7000 users sampled
8000 users sampled
9000 users sampled


In [19]:
vad_plays_negative = raw_data_for_negative.loc[raw_data_for_negative['userId'].isin(vd_users)]
vad_plays_negative = vad_plays_negative.loc[vad_plays_negative['movieId'].isin(unique_sid)]
vad_plays_negative['rating'].replace(1, -1.0, inplace=True)
vad_plays_negative['rating'].replace(2, -1.0, inplace=True)
vad_plays_negative['rating'].replace(3, -1.0, inplace=True)
vad_plays_tr = pd.concat([vad_plays_tr, vad_plays_negative]).sort_index()
vad_plays_tr['rating'].replace(4, 1.0, inplace=True)
vad_plays_tr['rating'].replace(5, 1.0, inplace=True)
vad_plays_tr

# We added this code for negative feedback experiment.

Unnamed: 0,movieId,userId,rating,timestamp
112,1,1116080,1,2005-08-08
216,1,1664010,1,2005-10-12
237,1,767518,1,2005-08-02
263,1,2413320,1,2004-02-06
328,1,2569099,-1,2005-08-16
...,...,...,...,...
100480355,17770,187160,-1,2005-01-23
100480419,17770,635712,-1,2003-11-15
100480438,17770,172257,-1,2005-03-14
100480490,17770,635735,1,2004-12-01


In [20]:
test_plays = raw_data.loc[raw_data['userId'].isin(te_users)]
test_plays = test_plays.loc[test_plays['movieId'].isin(unique_sid)]

In [21]:
test_plays_tr, test_plays_te = split_train_test_proportion(test_plays)

0 users sampled
1000 users sampled
2000 users sampled
3000 users sampled
4000 users sampled
5000 users sampled
6000 users sampled
7000 users sampled
8000 users sampled
9000 users sampled


In [22]:
test_plays_negative = raw_data_for_negative.loc[raw_data_for_negative['userId'].isin(te_users)]
test_plays_negative = test_plays_negative.loc[test_plays_negative['movieId'].isin(unique_sid)]
test_plays_negative['rating'].replace(1, -1.0, inplace=True)
test_plays_negative['rating'].replace(2, -1.0, inplace=True)
test_plays_negative['rating'].replace(3, -1.0, inplace=True)
test_plays_tr = pd.concat([test_plays_tr, test_plays_negative]).sort_index()
test_plays_tr['rating'].replace(4, 1.0, inplace=True)
test_plays_tr['rating'].replace(5, 1.0, inplace=True)

# We added this code for negative feedback experiment.

In [23]:
def numerize(tp):
    uid = map(lambda x: profile2id[x], tp['userId'])
    sid = map(lambda x: show2id[x], tp['movieId'])
    rating = map(lambda x: x, tp['rating']) # We added this code for negative feedback experiment.
    return pd.DataFrame(data={'uid': list(uid), 'sid': list(sid), 'rating': list(rating)}, columns=['uid', 'sid', 'rating']) # We modified this code for negative feedback experiment.

In [24]:
train_plays['rating'] = 1.0
vad_plays_te['rating'] = 1.0
test_plays_te['rating'] = 1.0

# We added this code for negative feedback experiment.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_plays['rating'] = 1.0


In [25]:
train_data = numerize(train_plays)
train_data.to_csv(os.path.join(pro_dir, 'train.csv'), index=False)

In [26]:
vad_data_tr = numerize(vad_plays_tr)
vad_data_tr.to_csv(os.path.join(pro_dir, 'validation_tr.csv'), index=False)

In [27]:
vad_data_te = numerize(vad_plays_te)
vad_data_te.to_csv(os.path.join(pro_dir, 'validation_te.csv'), index=False)

In [28]:
test_data_tr = numerize(test_plays_tr)
test_data_tr.to_csv(os.path.join(pro_dir, 'test_tr.csv'), index=False)

In [29]:
test_data_te = numerize(test_plays_te)
test_data_te.to_csv(os.path.join(pro_dir, 'test_te.csv'), index=False)

In [30]:
train_data.rating.unique(), vad_data_tr.rating.unique(), vad_data_te.rating.unique(), test_data_tr.rating.unique(), test_data_te.rating.unique()

# We added this code for negative feedback experiment.

(array([1.]), array([ 1, -1]), array([1.]), array([-1,  1]), array([1.]))

In [31]:
unique_sid = list()
with open(os.path.join(pro_dir, 'unique_sid.txt'), 'r') as f:
    for line in f:
        unique_sid.append(line.strip())

n_items = len(unique_sid)

In [32]:
def load_train_data(csv_file):
    tp = pd.read_csv(csv_file)
    n_users = tp['uid'].max() + 1

    rows, cols = tp['uid'], tp['sid']
    data = sparse.csr_matrix((np.ones_like(rows),
                             (rows, cols)), dtype='float64',
                             shape=(n_users, n_items))
    return data

In [33]:
def load_tr_te_data(csv_file_tr, csv_file_te):
    tp_tr = pd.read_csv(csv_file_tr)
    tp_te = pd.read_csv(csv_file_te)

    start_idx = min(tp_tr['uid'].min(), tp_te['uid'].min())
    end_idx = max(tp_tr['uid'].max(), tp_te['uid'].max())

    rows_tr, cols_tr = tp_tr['uid'] - start_idx, tp_tr['sid']
    rows_te, cols_te = tp_te['uid'] - start_idx, tp_te['sid']

    data_tr = sparse.csr_matrix((tp_tr['rating'], # We modified this code for negative feedback experiment.
                             (rows_tr, cols_tr)), dtype='float64', shape=(end_idx - start_idx + 1, n_items))
    data_te = sparse.csr_matrix((np.ones_like(rows_te),
                             (rows_te, cols_te)), dtype='float64', shape=(end_idx - start_idx + 1, n_items))
    return data_tr, data_te

In [34]:
### load training data
X = load_train_data(os.path.join(pro_dir, 'train.csv'))

In [35]:
### load validation data
validation_data_tr, validation_data_te = load_tr_te_data(
    os.path.join(pro_dir, 'validation_tr.csv'),
    os.path.join(pro_dir, 'validation_te.csv'))

In [36]:
### load test data
test_data_tr, test_data_te = load_tr_te_data(
    os.path.join(pro_dir, 'test_tr.csv'),
    os.path.join(pro_dir, 'test_te.csv'))

N_test = test_data_tr.shape[0]
idxlist_test = range(N_test)

In [37]:
lambda_ = 1000
G = X.T.dot(X).toarray()
diagIndices = np.diag_indices(G.shape[0])
G[diagIndices] += lambda_
P = np.linalg.inv(G)
B = P / (-np.diag(P))
B[diagIndices] = 0

In [38]:
def NDCG_binary_at_k_batch(X_pred, heldout_batch, k=100):
    '''
    normalized discounted cumulative gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    '''
    batch_users = X_pred.shape[0]
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                       idx_topk_part[:, :k]]
    idx_part = np.argsort(-topk_part, axis=1)
    # X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
    # topk predicted score
    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
    # build the discount template
    tp = 1. / np.log2(np.arange(2, k + 2))

    DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
                         idx_topk].toarray() * tp).sum(axis=1)
    IDCG = np.array([(tp[:min(n, k)]).sum()
                     for n in heldout_batch.getnnz(axis=1)])
    
    return DCG / IDCG

In [39]:
def Recall_at_k_batch(X_pred, heldout_batch, k=100):
    batch_users = X_pred.shape[0]

    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True

    X_true_binary = (heldout_batch > 0).toarray()
    tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(
        np.float32)
    recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
    return recall

In [40]:
def evaluate_EASE_for_validation(B, validation_data_tr, validation_data_te):
    n100_list = []

    Xvalidation = validation_data_tr
    if sparse.isspmatrix(Xvalidation):
        Xvalidation = Xvalidation.toarray()
    Xvalidation = Xvalidation.astype('float32')
    
    pred_val = (Xvalidation).dot(B)
    pred_val[Xvalidation.nonzero()] = -np.inf

    n100_list.append(NDCG_binary_at_k_batch(pred_val, validation_data_te, k=100))
    n100_list = np.concatenate(n100_list)
    print("NDCG@100=%.5f (%.5f)" % (np.mean(n100_list), np.std(n100_list) / np.sqrt(len(n100_list))))
    
    return np.mean(n100_list)

# We added this code for negative feedback experiment.

In [41]:
def evaluate_EASE(B, test_data_tr, test_data_te, step=2):
    print("Evaluating on test set ...")
    n5_list, n10_list, n50_list, n100_list, r1_list, r5_list, r10_list, r20_list, r50_list, r100_list = [], [], [], [], [], [], [], [], [], []

    Xtest = test_data_tr
    if sparse.isspmatrix(Xtest):
        Xtest = Xtest.toarray()
    Xtest = Xtest.astype('float32')
    
    if step == 0:
        Xtest[np.where(Xtest < 1)] = 0
        pred_val = (Xtest).dot(B)
        pred_val[Xtest.nonzero()] = -np.inf
    
    if step == 1:
        Xtest[np.where(Xtest < 1)] = 0
        pred_val = (Xtest).dot(B)
        pred_val[test_data_tr.nonzero()] = -np.inf
        
    if step == 2:
        pred_val = (Xtest).dot(B)
        pred_val[Xtest.nonzero()] = -np.inf

    n5_list.append(NDCG_binary_at_k_batch(pred_val, test_data_te, k=5))
    n10_list.append(NDCG_binary_at_k_batch(pred_val, test_data_te, k=10))
    n50_list.append(NDCG_binary_at_k_batch(pred_val, test_data_te, k=50))
    n100_list.append(NDCG_binary_at_k_batch(pred_val, test_data_te, k=100))
    r1_list.append(Recall_at_k_batch(pred_val, test_data_te, k=1))
    r5_list.append(Recall_at_k_batch(pred_val, test_data_te, k=5))
    r10_list.append(Recall_at_k_batch(pred_val, test_data_te, k=10))
    r20_list.append(Recall_at_k_batch(pred_val, test_data_te, k=20))
    r50_list.append(Recall_at_k_batch(pred_val, test_data_te, k=50))
    r100_list.append(Recall_at_k_batch(pred_val, test_data_te, k=100))
    
    n5_list = np.concatenate(n5_list)
    n10_list = np.concatenate(n10_list)
    n50_list = np.concatenate(n50_list)
    n100_list = np.concatenate(n100_list)
    r1_list = np.concatenate(r1_list)
    r5_list = np.concatenate(r5_list)
    r10_list = np.concatenate(r10_list)
    r20_list = np.concatenate(r20_list)
    r50_list = np.concatenate(r50_list)
    r100_list = np.concatenate(r100_list)
    
    print("Test Recall@1=%.5f (%.5f)" % (np.mean(r1_list), np.std(r1_list) / np.sqrt(len(r1_list))))
    print("Test Recall@5=%.5f (%.5f)" % (np.mean(r5_list), np.std(r5_list) / np.sqrt(len(r5_list))))
    print("Test Recall@10=%.5f (%.5f)" % (np.mean(r10_list), np.std(r10_list) / np.sqrt(len(r10_list))))
    print("Test Recall@20=%.5f (%.5f)" % (np.mean(r20_list), np.std(r20_list) / np.sqrt(len(r20_list))))
    print("Test Recall@50=%.5f (%.5f)" % (np.mean(r50_list), np.std(r50_list) / np.sqrt(len(r50_list))))
    print("Test Recall@100=%.5f (%.5f)" % (np.mean(r100_list), np.std(r100_list) / np.sqrt(len(r100_list))))
    print("Test NDCG@5=%.5f (%.5f)" % (np.mean(n5_list), np.std(n5_list) / np.sqrt(len(n5_list))))
    print("Test NDCG@10=%.5f (%.5f)" % (np.mean(n10_list), np.std(n10_list) / np.sqrt(len(n10_list))))
    print("Test NDCG@50=%.5f (%.5f)" % (np.mean(n50_list), np.std(n50_list) / np.sqrt(len(n50_list))))
    print("Test NDCG@100=%.5f (%.5f)" % (np.mean(n100_list), np.std(n100_list) / np.sqrt(len(n100_list))))
    
# We modified this code for negative feedback experiment.

In [42]:
# We added this code for negative feedback experiment.
print('EASE (base model)\n')
evaluate_EASE(B, test_data_tr, test_data_te, step=0)

EASE (base model)

Evaluating on test set ...
Test Recall@1=0.42820 (0.00495)
Test Recall@5=0.35412 (0.00316)
Test Recall@10=0.34384 (0.00278)
Test Recall@20=0.36203 (0.00255)
Test Recall@50=0.44528 (0.00248)
Test Recall@100=0.55499 (0.00244)
Test NDCG@5=0.36661 (0.00326)
Test NDCG@10=0.34735 (0.00282)
Test NDCG@50=0.35646 (0.00206)
Test NDCG@100=0.39468 (0.00198)


In [43]:
# We added this code for negative feedback experiment.
print('Enf_EASE (Exclude negative feedback in ranking)\n')
evaluate_EASE(B, test_data_tr, test_data_te, step=1)

Enf_EASE (Exclude negative feedback in ranking)

Evaluating on test set ...
Test Recall@1=0.46880 (0.00499)
Test Recall@5=0.39896 (0.00341)
Test Recall@10=0.38909 (0.00304)
Test Recall@20=0.40446 (0.00277)
Test Recall@50=0.48428 (0.00253)
Test Recall@100=0.58910 (0.00241)
Test NDCG@5=0.41040 (0.00348)
Test NDCG@10=0.39169 (0.00308)
Test NDCG@50=0.39744 (0.00228)
Test NDCG@100=0.43365 (0.00217)


In [44]:
# We added this code for negative feedback experiment.
print('find optimal negative feedback weight alpha through validation')

sub_alpha = [-1, -0.7, -0.4, -0.1, 0.2, 0.5, 0.8]
result = []
for a in sub_alpha:
    print('\nalpha =', a)
    sub_validation_data_tr = deepcopy(validation_data_tr)
    sub_validation_data_tr = sub_validation_data_tr.toarray()
    sub_validation_data_tr[np.where(sub_validation_data_tr < 0)] = a
    result.append(evaluate_EASE_for_validation(B, sub_validation_data_tr, validation_data_te))
    
sub_maxi = sub_alpha[result.index(max(result))]
sub_alpha = [sub_maxi-0.1, sub_maxi, sub_maxi+0.1]
result = []
for a in sub_alpha:
    print('\nalpha =', a)
    sub_validation_data_tr = deepcopy(validation_data_tr)
    sub_validation_data_tr = sub_validation_data_tr.toarray()
    sub_validation_data_tr[np.where(sub_validation_data_tr < 0)] = a
    result.append(evaluate_EASE_for_validation(B, sub_validation_data_tr, validation_data_te))
    
alpha = sub_alpha[result.index(max(result))]
print('\noptimal negative feedback weight alpha =', alpha)

find optimal negative feedback weight alpha through validation

alpha = -1
NDCG@100=0.33766 (0.00212)

alpha = -0.7
NDCG@100=0.36413 (0.00214)

alpha = -0.4
NDCG@100=0.39477 (0.00216)

alpha = -0.1
NDCG@100=0.42508 (0.00217)

alpha = 0.2
NDCG@100=0.44581 (0.00219)

alpha = 0.5
NDCG@100=0.45064 (0.00220)

alpha = 0.8
NDCG@100=0.44315 (0.00217)

alpha = 0.4
NDCG@100=0.45086 (0.00221)

alpha = 0.5
NDCG@100=0.45064 (0.00220)

alpha = 0.6
NDCG@100=0.44930 (0.00220)

optimal negative feedback weight alpha = 0.4


In [45]:
# We added this code for negative feedback experiment.
print('UEnf_EASE (Use&Exclude negative feedback in raking)\n')
test_data_tr = test_data_tr.toarray()
test_data_tr[np.where(test_data_tr < 0)] = alpha
test_data_tr = sparse.csr_matrix(test_data_tr)
evaluate_EASE(B, test_data_tr, test_data_te, step=2)

# This is our final model that we propose.

UEnf_EASE (Use&Exclude negative feedback in raking)

Evaluating on test set ...
Test Recall@1=0.48540 (0.00500)
Test Recall@5=0.41740 (0.00345)
Test Recall@10=0.40591 (0.00309)
Test Recall@20=0.42062 (0.00279)
Test Recall@50=0.50248 (0.00253)
Test Recall@100=0.60703 (0.00239)
Test NDCG@5=0.42877 (0.00351)
Test NDCG@10=0.40909 (0.00312)
Test NDCG@50=0.41436 (0.00231)
Test NDCG@100=0.45031 (0.00220)
