In [1]:
%pip install Recommenders -q

Collecting Recommenders
  Downloading recommenders-0.6.0-py3-none-manylinux1_x86_64.whl (233 kB)
[K     |████████████████████████████████| 233 kB 14.9 MB/s 
Collecting transformers<5,>=2.5.0
  Downloading transformers-4.9.2-py3-none-any.whl (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 61.0 MB/s 
Collecting pydocumentdb>=2.3.3<3
  Downloading pydocumentdb-2.3.5-py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 2.8 MB/s 
[?25hCollecting cornac<2,>=1.1.2
  Downloading cornac-1.13.5-cp37-cp37m-manylinux1_x86_64.whl (12.4 MB)
[K     |████████████████████████████████| 12.4 MB 61.2 MB/s 
[?25hCollecting scikit-surprise<=1.1.1,>=0.19.1
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 15.4 MB/s 
[?25hCollecting pyyaml<6,>=5.4.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 63.9 MB/s 
[?25hCollecting lightfm<2,>=1.15
  Downloa

In [7]:
import sys
import time
import os
import shutil
import pandas as pd
import numpy as np
%tensorflow_version 1.x

import tensorflow as tf
from reco_utils.recommender.ncf.ncf_singlenode import NCF
from reco_utils.recommender.ncf.dataset import Dataset as NCFDataset
from reco_utils.dataset import movielens
from reco_utils.dataset.python_splitters import python_chrono_split
from reco_utils.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k,
                                                     recall_at_k, precision_at_k, get_top_k_items)
from reco_utils.common.constants import SEED as DEFAULT_SEED

In [3]:
# top k 아이템 설정
TOP_K = 10

# MovieLens data size: 100k, 1m, 10m, 20m 중 제일 작은 것 가져오기
MOVIELENS_DATA_SIZE = '100k'

# Model Params
EPOCHS = 100
BATCH_SIZE = 256

SEED = DEFAULT_SEED

# loading movie lens data
df = movielens.load_pandas_df(size=MOVIELENS_DATA_SIZE, header=['userID', 'itemID', 'rating', 'timestamp'])

100%|██████████| 4.81k/4.81k [00:00<00:00, 5.24kKB/s]


In [4]:
# 75% 비중으로 나누기
train, test = python_chrono_split(df, 0.75)

# userid, itemid, rating이 있는 데이터만 가져옴
data = NCFDataset(train=train, test=test, seed=SEED)

tf.compat.v1.disable_eager_execution()

In [5]:
# 모델 정의 및 학습, 예측
model = NCF(n_users=data.n_users, n_items=data.n_items, model_type='NeuMF', n_factors=4,
            layer_sizes=[16,8,4], n_epochs=EPOCHS, batch_size=BATCH_SIZE, learning_rate=1e-3,
            verbose=10, seed=SEED)
# n_factors: latent space의 차원 수
# layer_sizes: MLP 레이어 수

model.fit(data)

predictions = [[row.userID, row.itemID, model.predict(row.userID, row.itemID)]
               for (_, row) in test.iterrows()]

# 예측값 저장
predictions = pd.DataFrame(predictions, columns=['userID', 'itemID', 'prediction'])
predictions.head()

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Unnamed: 0,userID,itemID,prediction
0,1.0,149.0,0.048069
1,1.0,88.0,0.62564
2,1.0,101.0,0.079044
3,1.0,110.0,0.078758
4,1.0,103.0,0.004996


In [8]:
# 성능 확인
users, items, preds = [], [], []
item = list(train.itemID.unique())
for user in train.userID.unique():
    user = [user] * len(item)
    users.extend(user)
    items.extend(item)
    preds.extend(list(model.predict(user, item, is_list=True)))

all_predictions = pd.DataFrame(data={'userID': users, 'itemID': items, 'prediction': preds})

merged = pd.merge(train, all_predictions, on=['userID', 'itemID'], how='outer')
all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

In [10]:
eval_ndcg

0.1906842374042988