In [1]:
!pip install tensorflow==1.15 cornac

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [17]:
import cornac
import pickle
import itertools
import numpy as np
import pandas as pd

from scipy.sparse import csr_matrix
from sklearn.preprocessing import normalize
from collections import OrderedDict, defaultdict

### Load data and preprocessing

In [3]:
name_books = pd.read_csv('books.csv')
train = pd.read_csv('train.csv', index_col=False)
test = pd.read_csv('test.csv', index_col=False)

In [4]:
test = test.sort_values(by=['id', 'date'])
test = test.groupby('id').last().reset_index()

In [5]:
users = pd.concat([train[['id']], test[['id']]], 
                  ignore_index=True)
users.id = users.id.astype('category')
users['category_id'] = users.id.cat.codes

books = pd.concat([train[['id_book']], test[['id_book']]], 
                  ignore_index=True)

books.id_book = books.id_book.astype('category')
books['category_book'] = books.id_book.cat.codes

### Train and test split

In [6]:
train = pd.merge(train, users, left_on='id', right_on='id', 
                 how='inner').drop_duplicates()
test = pd.merge(test, users, left_on='id', right_on='id', 
                how='inner').drop_duplicates()

In [7]:
train = pd.merge(train, books, left_on='id_book', right_on='id_book', how='inner').drop_duplicates()
test = pd.merge(test, books, left_on='id_book', right_on='id_book', how='inner').drop_duplicates()

In [8]:
print(f'train:\t\t{train.shape[0]}\ntest:\t\t{test.shape[0]}')

train:		121506
test:		1342


### Helper functions

In [9]:
def to_cornac_ds(ds):
    """
    Convert dataset into cornac format
    """
    ds_user_num = ds.category_id.max() + 1
    ds_book_num = ds.category_book.max() + 1
    
    uid_map = ds[['id', 'category_id']].to_dict(
        into=OrderedDict, orient='index')
    iid_map = ds[['id_book', 'category_book']].to_dict(
        into=OrderedDict, orient='index')
  
    cat_users = ds.category_id.values
    cat_books = ds.category_book.values
    interact = np.full((1, cat_users.shape[0]), 1)
  
    uir_tuple = (cat_users, cat_books, interact)

    cornac_ds = cornac.data.Dataset(num_users=ds_user_num,
                                    num_items=ds_book_num,
                                    uid_map=uid_map,
                                    iid_map=iid_map,
                                    uir_tuple=uir_tuple)
    return cornac_ds

In [10]:
def apk(actual, predicted, k=10):
    """
    Average precison at k
    """
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score

def mapk(actual, predicted, k=10):
    """
    Mean average precison at k
    """
    return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])

In [11]:
def hit_rate_als(model, test, n):
    """
    Count hit rate for ALS
    """
    users = test.category_id.unique()
    score = 0
    
    for user in users: 
        preds = model.score(int(user)).argsort()[::-1][:n]
        actual = test[test.category_id == user]['category_book'].values.tolist()

        hit = len(set(actual) & set(preds))
        if hit > 0:
            score += 1
        hit_rate = score / users.shape[0]
      
    return hit_rate

In [12]:
def map_als(model, test, n):
    """
    Count mean average precision for ALS
    """
    users = test.category_id.unique()
    score = 0
    actuals = []
    preds = []

    for user in users:
        actual = []
        pred = model.score(int(user)).argsort()[::-1][:n]
        actual.append(test[test.category_id == user].iloc[-1]['category_book'])
        
        actuals.append(actual)
        preds.append(pred)
      
    return actuals, preds

### Train model

In [20]:
train_nfm = to_cornac_ds(train)

In [23]:
model = cornac.models.NeuMF(
    num_factors=10,
    num_neg=10,
    num_epochs=30,
    seed=123)

model.fit(train_nfm)

  0%|          | 0/30 [00:00<?, ?it/s]

<cornac.models.ncf.recom_neumf.NeuMF at 0x7f4119515c10>

In [25]:
print('NeuFM model')
for n in [1, 5, 10]:
  hr = round(hit_rate_als(model, test, n), 5)
  print(f'Hit Rate@{n}:\t{hr}')

y_true, y_pred = map_als(model, test, 10)
for k in [1,2,5,10]:
  print(f"MAP@{k}:\t{mapk(y_true, y_pred, k=k)}")

NeuFM model
Hit Rate@1:	0.02161
Hit Rate@5:	0.14382
Hit Rate@10:	0.28763
MAP@1:	0.021609538002980627
MAP@2:	0.037257824143070044
MAP@5:	0.06162444113263785
MAP@10:	0.08032165921510184
