In [None]:
# !pip install scikit-surprise

In [1]:
import surprise
import pandas as pd
import numpy as np

In [2]:
# 고유값 분해(행렬을 더 낮은 차원으로 분해)해서 
# 다시 원본으로 살리면서 잠재적인 값을 추론하는 방법
from surprise import SVD

# SVD를 사용하기 위한 테이터셋을 만들어주는 클래스
# 사용자, 아이템, 평점
from surprise import Dataset

# RMSE, MAE 등을 사용한 정확도 측정
from surprise import accuracy

# 훈련/검증 데이터 분류
from surprise.model_selection import train_test_split

In [4]:
ratings = pd.read_csv('./movie_rating.csv')
ratings

Unnamed: 0,critic,title,rating
0,Jack,Lady,3.0
1,Jack,Snakes,4.0
2,Jack,You Me,3.5
3,Jack,Superman,5.0
4,Jack,The Night,3.0
5,Mick,Lady,3.0
6,Mick,Snakes,4.0
7,Mick,Just My,2.0
8,Mick,Superman,3.0
9,Mick,You Me,2.0


In [5]:
movie_title = list(set(ratings['title']))
movie_title

['Just My', 'You Me', 'The Night', 'Snakes', 'Lady', 'Superman']

In [6]:
movie_critic = list(set(ratings['critic']))
movie_critic

['Lisa', 'Gene', 'Toby', 'Claudia', 'Jack', 'Mick']

In [11]:
data = pd.crosstab(index = ratings.critic, columns = ratings.title, 
            values = ratings.rating, aggfunc = np.sum)

In [14]:
data

title,Just My,Lady,Snakes,Superman,The Night,You Me
critic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Claudia,3.0,,3.5,4.0,4.5,2.5
Gene,1.5,3.0,3.5,5.0,3.0,3.5
Jack,,3.0,4.0,5.0,3.0,3.5
Lisa,3.0,2.5,3.5,3.5,3.0,2.5
Mick,2.0,3.0,4.0,3.0,3.0,2.0
Toby,,,4.5,4.0,,1.0


In [61]:
from surprise import Reader
reader = Reader(rating_scale = (0, 5.0))

In [62]:
data = Dataset.load_from_df(ratings[['critic','title','rating']], reader)
data

<surprise.dataset.DatasetAutoFolds at 0x150ce6e9070>

In [63]:
trainset, testset = train_test_split(data, test_size=.1, random_state=0)

In [64]:
#객체 생성 , #훈련
algo = SVD(n_factors=20, n_epochs= 30,  random_state=1)
algo.fit(trainset) 
predictions = algo.test( testset )
accuracy.rmse(predictions)

RMSE: 0.7334


0.7333564641976573

In [65]:
predictions = algo.test(testset)
predictions[:5]

[Prediction(uid='Jack', iid='Lady', r_ui=3.0, est=3.3722328746210497, details={'was_impossible': False}),
 Prediction(uid='Lisa', iid='You Me', r_ui=2.5, est=2.8210673313922223, details={'was_impossible': False}),
 Prediction(uid='Claudia', iid='The Night', r_ui=4.5, est=3.126918654202613, details={'was_impossible': False}),
 Prediction(uid='Claudia', iid='Just My', r_ui=3.0, est=2.844266612734386, details={'was_impossible': False})]

In [66]:
uid = 'Toby'
iid = 'Just My'
pred = algo.predict(uid,iid)
pred

Prediction(uid='Toby', iid='Just My', r_ui=None, est=2.7629850097817372, details={'was_impossible': False})

In [85]:
## 내가 안본 영화 리스트를 구해서, 그 중에서 추천

def get_unseen_surprise(movie_title, ratings, critic):
    ## 1. 전체 영화id 리스트
    total_movies = list(set(ratings['title']))
    ## 2. 내가 본 영화id 리스트 (TOBY)
    seen_movies = ratings[ratings['critic'] == critic]['title'].tolist()
    ## 3. 안본 영화 리스트 = 1 - 2
    unseen_movies = [ movie for movie in total_movies if movie not in seen_movies]
    
    print('전체 영화 수 >>> ',len(total_movies))
    print('평점 매긴 영화 수 >>> ',len(seen_movies))
    print('안 본 영화 수 >>> ',len(unseen_movies))
    print(total_movies)
    print(unseen_movies)
    return unseen_movies
    
unseen_movies = get_unseen_surprise(movie_title, ratings, 'Toby')

전체 영화 수 >>>  6
평점 매긴 영화 수 >>>  3
안 본 영화 수 >>>  3
['Just My', 'You Me', 'The Night', 'Snakes', 'Lady', 'Superman']
['Just My', 'The Night', 'Lady']


In [94]:
def recomm_movie_by_surprise(algo, critic, unseen_movies):
    ## 안본 영화리스트를 하나씩 꺼낸다음 평점을 예측하세요.
    predictions = [ algo.predict(critic, movie) for movie in unseen_movies]
    
    critic = [pred.uid for pred in predictions]
    titles = [pred.iid for pred in predictions]
    rating = [ pred.est for pred in predictions]
    ##titles = ratings[ratings.critic.isin(critic)]['title']
    preds = [ (critic, title, rating) for critic, title, rating in zip(critic, titles,rating)]
    return preds
    

In [95]:
recomm_movie_by_surprise(algo, 'Toby', unseen_movies)

[('Toby', 'Just My', 2.7629850097817372),
 ('Toby', 'The Night', 3.04438928944478),
 ('Toby', 'Lady', 3.043452040993306)]