In [0]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity

## データ読み込み

In [0]:
cols_name = ['user_id','item_id','rating','timestamp']
data_movie = pd.read_csv('u.data', names=cols_name, sep="\t")
print(data_movie.head())

In [0]:
movie_rating = data_movie.pivot(index='user_id', columns='item_id', values='rating').fillna(0).as_matrix()
print(movie_rating[0:5])
print(movie_rating.shape)

## コサイン類似度計算

In [0]:
cos_sim = cosine_similarity(movie_rating, movie_rating)
print(cos_sim[:5])
print(cos_sim.shape)

## レコメンド

In [0]:
# ユーザー1との類似度
cos_sim_for_user_1 = cos_sim[0]
# ユーザー1と類似度の高いユーザー10人のインデックスを抽出
similar_user = np.argsort(cos_sim_for_user_1)[-11:-1]
print(similar_user)

In [0]:
print(cos_sim_for_user_1[similar_user])

In [0]:
# 類似度の高いユーザーの映画評価値
movie_rating_of_similar_user = movie_rating[similar_user]
print(movie_rating_of_similar_user)

In [0]:
# 重みづけされた評価値を計算
weighted_movie_rating = movie_rating_of_similar_user * cos_sim_for_user_1[similar_user].reshape(-1, 1)
print(weighted_movie_rating)

In [0]:
# 各映画のレコメンド値を計算
mean_weighted_movie_rating = weighted_movie_rating.mean(axis=0)
print(mean_weighted_movie_rating)

In [0]:
#ユーザー１の評価と加重平均スコアを列とするデータフレーム作成
recommend_values = pd.DataFrame({'user_1_score':movie_rating[0], 'recommend_value':mean_weighted_movie_rating})
print(recommend_values.head())

In [0]:
#未評価のうちスコアの高い上位10件を抽出
recommend_values[recommend_values['user_1_score'] == 0].sort_values('recommend_value', ascending=False).head(10)