In [1]:
import pandas as pd
ratings =pd.read_csv('data/ratings_small.csv')

In [2]:
min = ratings['rating'].min()
max = ratings['rating'].max()
min,max

(0.5, 5.0)

In [3]:
from surprise import Reader, Dataset, SVD

In [4]:
from surprise.model_selection import cross_validate
reader = Reader(rating_scale=(0.5, 5.0))

In [5]:
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader=reader)
data

<surprise.dataset.DatasetAutoFolds at 0x268bf6f3ad0>

In [6]:
svd = SVD(random_state=0)

In [9]:
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9004  0.9011  0.8911  0.9026  0.8962  0.8982  0.0042  
MAE (testset)     0.6936  0.6933  0.6891  0.6919  0.6888  0.6914  0.0020  
Fit time          0.81    0.86    0.86    0.82    0.82    0.83    0.02    
Test time         0.08    0.08    0.12    0.07    0.07    0.09    0.02    


{'test_rmse': array([0.90036924, 0.90105773, 0.89105315, 0.90258873, 0.89616307]),
 'test_mae': array([0.69364022, 0.69333268, 0.68913134, 0.69186656, 0.68884935]),
 'fit_time': (0.8098063468933105,
  0.8577065467834473,
  0.8587048053741455,
  0.8238203525543213,
  0.8158466815948486),
 'test_time': (0.07679462432861328,
  0.08377456665039062,
  0.11968016624450684,
  0.07377839088439941,
  0.0718071460723877)}

In [10]:
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x268c02d4380>

In [11]:
user_id=9
movie_id=42
movies = ratings[ratings['userId']==user_id]['movieId']
movies.count(), movies

(45,
 699       1
 700      17
 701      26
 702      36
 703      47
 704     318
 705     497
 706     515
 707     527
 708     534
 709     593
 710     608
 711     733
 712    1059
 713    1177
 714    1357
 715    1358
 716    1411
 717    1541
 718    1584
 719    1680
 720    1682
 721    1704
 722    1721
 723    1784
 724    2028
 725    2125
 726    2140
 727    2249
 728    2268
 729    2273
 730    2278
 731    2291
 732    2294
 733    2302
 734    2391
 735    2396
 736    2427
 737    2490
 738    2501
 739    2539
 740    2571
 741    2628
 742    2762
 743    2857
 Name: movieId, dtype: int64)

In [14]:
if movies[movies==movie_id].count() == 0:
    print(f'사용자: {user_id}는(은) 영화 아이디 {movie_id} 평점 없음')

사용자: 9는(은) 영화 아이디 42평점 없음


In [15]:
pred = svd.predict(uid, mid)
pred

Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False})

In [18]:
print(f'사용자 아이디 : {user_id}의 예측 평점 -> {pred.est}점')

사용자 아이디 : 9의 예측 평점 -> 2.9420328554558783점


In [19]:
seen_movies = ratings[ratings['userId']==user_id]['movieId'].tolist()
len(seen_movies)

45

In [20]:
total_movies = ratings['movieId'].drop_duplicates().sort_values().tolist()
len(total_movies)

9066

In [21]:
import numpy as np
unseen_movies = np.setdiff1d(total_movies, seen_movies)
len(unseen_movies)

9021

In [22]:
def get_unseen_movies(ratings, user_id):
 seen_movies = ratings[ratings['userId']==user_id]['movieId'].tolist()
 total_movies = ratings['movieId'].drop_duplicates().sort_values().tolist()
 unseen_movies = np.setdiff1d(total_movies, seen_movies)
 print(f'평점 매긴 영화 수:{len(seen_movies)}')
 print(f'추천 대상 영화 수:{len(unseen_movies)}')
 print(f'모든 영화 수:{len(total_movies)}')
 return unseen_movies
unseen_movies = get_unseen_movies(ratings, user_id)

평점 매긴 영화 수:45
추천 대상 영화 수:9021
모든 영화 수:9066


In [23]:
predictions = [svd.predict(user_id, movie_id) for mid in unseen_movies]
predictions

[Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, i

In [24]:
predictions.sort(key=lambda pred:pred.est, reverse=True)
predictions

[Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, i

In [25]:
top_predictions = predictions[:10]
top_predictions

[Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False}),
 Prediction(uid=9, iid=42, r_ui=None, est=2.9420328554558783, details={'was_impossible': False})]

In [26]:
top_movies = [(pred.iid, pred.est) for pred in top_predictions]
top_movies

[(42, 2.9420328554558783),
 (42, 2.9420328554558783),
 (42, 2.9420328554558783),
 (42, 2.9420328554558783),
 (42, 2.9420328554558783),
 (42, 2.9420328554558783),
 (42, 2.9420328554558783),
 (42, 2.9420328554558783),
 (42, 2.9420328554558783),
 (42, 2.9420328554558783)]

In [27]:
def recomm_movies(predictions):
 predictions = [svd.predict(uid, mid) for mid in unseen_movies]
 predictions.sort(key=lambda pred:pred.est, reverse=True)
 top_predictions = predictions[:10]
 top_movies = [(pred.iid, pred.est) for pred in top_predictions]
 return top_movies

In [28]:
movies = recomm_movies(predictions)
movies
print('******* Top-10 추천 영화 리스트 *******')
print('-' * 50)
for movie in movies:
 print(f'영화 아이디:{movie[0]} (평점:{movie[1]})')
 print('-' * 50)

******* Top-10 추천 영화 리스트 *******
--------------------------------------------------
영화 아이디:858 (평점:4.542866877335705)
--------------------------------------------------
영화 아이디:912 (평점:4.484090707192216)
--------------------------------------------------
영화 아이디:4993 (평점:4.471004680156093)
--------------------------------------------------
영화 아이디:926 (평점:4.427937145395248)
--------------------------------------------------
영화 아이디:745 (평점:4.41983077978538)
--------------------------------------------------
영화 아이디:904 (평점:4.4091355113400486)
--------------------------------------------------
영화 아이디:908 (평점:4.399526575526584)
--------------------------------------------------
영화 아이디:969 (평점:4.389459379084835)
--------------------------------------------------
영화 아이디:1278 (평점:4.375797151947231)
--------------------------------------------------
영화 아이디:8132 (평점:4.361136646233039)
--------------------------------------------------
