In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install tensorflow==1.15 cornac

In [19]:
import cornac
import pickle
import itertools
import numpy as np
import pandas as pd

from scipy.sparse import csr_matrix
from sklearn.preprocessing import normalize
from collections import OrderedDict, defaultdict

In [20]:
name_books = pd.read_csv('books.csv')
train = pd.read_csv('train.csv', index_col=False)
test = pd.read_csv('test.csv', index_col=False)

In [21]:
test = test.sort_values(by=['id', 'date'])
test = test.groupby('id').last().reset_index()

In [22]:
users = pd.concat([train[['id']], test[['id']]], 
                  ignore_index=True)
users.id = users.id.astype('category')
users['category_id'] = users.id.cat.codes

books = pd.concat([train[['id_book']], test[['id_book']]], 
                  ignore_index=True)
books.id_book = books.id_book.astype('category')
books['category_book'] = books.id_book.cat.codes

In [23]:
train = pd.merge(train, users, left_on='id', right_on='id', 
                 how='inner').drop_duplicates()
test = pd.merge(test, users, left_on='id', right_on='id', 
                how='inner').drop_duplicates()

In [24]:
train = pd.merge(train, books, left_on='id_book', right_on='id_book', how='inner').drop_duplicates()
test = pd.merge(test, books, left_on='id_book', right_on='id_book', how='inner').drop_duplicates()

In [25]:
print(f'train:\t\t{train.shape[0]}\ntest:\t\t{test.shape[0]}')

train:		121506
test:		1342


In [26]:
def to_cornac_ds(ds):
  ds_user_num = ds.category_id.max() + 1
  ds_book_num = ds.category_book.max() + 1
  
  uid_map = ds[['id', 'category_id']].to_dict(
      into=OrderedDict, orient='index')
  iid_map = ds[['id_book', 'category_book']].to_dict(
      into=OrderedDict, orient='index')
  
  cat_users = ds.category_id.values
  cat_books = ds.category_book.values
  interact = np.full((1, cat_users.shape[0]), 1)
  
  uir_tuple = (cat_users, cat_books, interact)

  cornac_ds = cornac.data.Dataset(num_users=ds_user_num,
                               num_items=ds_book_num,
                               uid_map=uid_map,
                               iid_map=iid_map,
                               uir_tuple=uir_tuple)
  return cornac_ds

In [27]:
def hit_rate_als(model, test, n):
    users = test.category_id.unique()
    score = 0
    
    for user in users: 
      preds = model.score(int(user)).argsort()[::-1][:n]
      actual = test[test.category_id == user]['category_book'].values.tolist()

      hit = len(set(actual) & set(preds))
      if hit > 0:
        score += 1
      hite_rate = score / users.shape[0]
      
    return hite_rate

In [28]:
def map_als(model, test, n):
    users = test.category_id.unique()
    score = 0
    actuals = []
    preds = []

    for user in users:
      actual = []
      pred = model.score(int(user)).argsort()[::-1][:n]
      actual.append(test[test.category_id == user].iloc[-1]['category_book'])
      actuals.append(actual)
      preds.append(pred)
      
    return actuals, preds

In [29]:
def apk(actual, predicted, k=10):
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score

def mapk(actual, predicted, k=10):
    return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])

In [30]:
train_nfm = to_cornac_ds(train)

In [32]:
%%time
NCF2.fit(train_nfm)

  0%|          | 0/10 [00:00<?, ?it/s]

CPU times: user 7min 9s, sys: 6.6 s, total: 7min 15s
Wall time: 6min 57s


<cornac.models.ncf.recom_neumf.NeuMF at 0x7f781881ec90>

In [39]:
print('NCF model')
for n in [1, 5, 10]:
  hr = round(hit_rate_als(NCF2, test, n), 5)
  print(f'\tHit Rate@{n}:\t{hr}')
  
y_true, y_pred = map_als(NCF2, test, 10)
for k in [1,2,5,10]:
  print(f"MAP@{k}:\t{round(mapk(y_true, y_pred, k=k), 5)}")

NCF model
	Hit Rate@1:	0.02235
	Hit Rate@5:	0.15499
	Hit Rate@10:	0.2623
MAP@1:	0.02235
MAP@2:	0.04247
MAP@5:	0.06668
MAP@10:	0.08078


---

In [35]:
model = cornac.models.NeuMF(
    num_factors=10,
    num_neg=10,
    num_epochs=30,
    seed=123)
model.fit(train_nfm)

  0%|          | 0/30 [00:00<?, ?it/s]

<cornac.models.ncf.recom_neumf.NeuMF at 0x7f7818a6fb90>

In [40]:
print('NeuFM model')
for n in [1, 5, 10]:
  hr = round(hit_rate_als(model, test, n), 5)
  print(f'\tHit Rate@{n}:\t{hr}')

y_true, y_pred = map_als(model, test, 10)
print("NeuRM model\n")
for k in [1,2,5,10]:
  print(f"MAP@{k}:\t{mapk(y_true, y_pred, k=k)}")

NeuFM model
	Hit Rate@1:	0.02757
	Hit Rate@5:	0.14009
	Hit Rate@10:	0.26602
NeuRM model

MAP@1:	0.027570789865871834
MAP@2:	0.039865871833084945
MAP@5:	0.06265524093392945
MAP@10:	0.07906849525701984
