# レコメンドの実施

レコメンドモデルの構築と評価の実施  
surpriseライブラリを使用

In [1]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import SVD, NMF
from surprise.model_selection import cross_validate

In [2]:
df = pd.read_pickle("../data/data.pkl")

In [3]:
df = df[["user_id", "name", "user_rating"]]

In [4]:
df.shape

(5957004, 3)

### Create my dataset

printで表示されているのが評価で出力する自分のid

In [7]:
test_id = df["user_id"].max() + 1
print(test_id)

test_df = [
    [test_id, "Angel Beats!", 9],
    [test_id, "Code Geass: Hangyaku no Lelouch", 8],
    [test_id, "Toradora!", 8],
    [test_id, "Steins;Gate", 10],
    [test_id, "Doraemon (1979)", 6],
    [test_id, "Shirobako", 8],
    [test_id, "Glasslip", 3],
    [test_id, "Kimi no Na wa.", 6],
    [test_id, "Byousoku 5 Centimeter", 9],
]

test_df = pd.DataFrame(test_df, columns=df.columns)

73517


In [8]:
test_df.head()

Unnamed: 0,user_id,name,user_rating
0,73517,Angel Beats!,9
1,73517,Code Geass: Hangyaku no Lelouch,8
2,73517,Toradora!,8
3,73517,Steins;Gate,10
4,73517,Doraemon (1979),6


### Create dataset

メモリの都合で1万件に絞って学習用データセットを作成

In [9]:
# reduce data due to memory limitation
sample_data = df.sample(10000, random_state=42)
sample_data = pd.concat([sample_data, test_df], axis=0)

In [10]:
reader = Reader(rating_scale=(0, 10))
data = Dataset.load_from_df(sample_data, reader)

In [11]:
del df, sample_data

### Non negative matrix factorization (行列の次元削減手法) を活用して協調フィルタリング

In [12]:
# Non negative matrix factorization
trainset = data.build_full_trainset()
nmf = NMF()
nmf.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x7f850c0a8160>

In [13]:
del data

In [14]:
testset = trainset.build_anti_testset()

In [15]:
del trainset

In [16]:
pred = nmf.test(testset)

In [17]:
from collections import defaultdict


def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

## Prediction result

In [18]:
top_n = get_top_n(pred, n=20)

In [19]:
del pred

評価用の自分のidでレコメンド結果確認

In [20]:
top_n[73517]

[('Kuroshitsuji Picture Drama', 8.07063935240594),
 ('Slayers Special', 7.93893526895414),
 ('Kuroko no Basket: Tip Off', 7.7004767620369226),
 ('91 Days', 7.505669460621202),
 ('Cross Game', 7.432634365121517),
 ('Hunter x Hunter OVA', 7.4017509040740475),
 ('Mahou Shoujo Lyrical Nanoha', 7.292481800686363),
 ('Zero no Tsukaima: Princesses no Rondo', 7.267398677116635),
 ('Break Blade', 7.206900566405725),
 ('Tenchi Muyou! Ryououki', 7.191155443591789),
 ('Major S3', 7.172644173331797),
 ('Sayonara Zetsubou Sensei', 7.084024120087983),
 ('Plastic Neesan', 7.082430214358044),
 ('Toaru Kagaku no Railgun', 7.067395688446211),
 ('Zutto Mae kara Suki deshita.: Kokuhaku Jikkou Iinkai', 7.0499555960429845),
 ('Ao no Exorcist Movie', 7.048684545849936),
 ('ef: A Tale of Melodies.', 7.045526357426911),
 ('The iDOLM@STER', 7.040001144062005),
 ('Casshern Sins', 7.034706647913128),
 ('Suzumiya Haruhi no Shoushitsu', 7.031028857807739)]

In [21]:
# test_df = [
#     [test_id, "Angel Beats!", 10],
#     [test_id, "Ookami to Koushinryou", 10],
#     [test_id, "Shinsekai yori", 8],
#     [test_id, "Seikimatsu Occult Gakuin", 6],
#     [test_id, "Futari wa Precure", 8],
#     [test_id, "Shugo Chara!", 8],
#     [test_id, "Uchuu Senkan Yamato 2199", 9],
# ]