
# 추천 시스템 - 협업 필터링 Matrix Factorization

## 참고자료

- https://lsjsj92.tistory.com/569
- https://lsjsj92.tistory.com/570


- https://github.com/SurhanZahid/Recommendation-System-Using-Matrix-Factorization/blob/master/Recommender%20System%20With%20Matrix%20Factorization%20.ipynb
- https://github.com/nikitaa30/Recommender-Systems/blob/master/matrix_factorisation_svd.py

- 데이터: kaggle의 https://www.kaggle.com/sengzhaotoo/movielens-small

# import

In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
from sklearn.decomposition import TruncatedSVD
from scipy.sparse.linalg import svds

In [39]:
ratings = pd.read_csv('https://raw.githubusercontent.com/StillWork/data/master/ratings.csv')
movies = pd.read_csv('https://raw.githubusercontent.com/StillWork/data/master/movies.csv')

In [40]:
print(movies.shape)
ratings.head()

(9125, 3)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [41]:
print(ratings.shape)
movies.head()

(100004, 4)


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


두 개의 파일은 사용자-평점 데이터와 영화 데이터로 나뉘어져 있습니다.   

이 두개의 파일은 공통적으로 movieId를 가지고 있습니다. 즉, movieId를 이용하면 하나로 합칠 수 있습니다.  


# 전처리 

In [42]:
# 두 테이블을 merge하면 된다

ratings_movies = pd.merge(ratings, movies, on = 'movieId')
print(ratings_movies.shape)
ratings_movies[:3]

(100004, 6)


Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,31,2.5,1260759144,Dangerous Minds (1995),Drama
1,7,31,3.0,851868750,Dangerous Minds (1995),Drama
2,31,31,4.0,1273541953,Dangerous Minds (1995),Drama


In [43]:
movie_user_rating = ratings_movies.pivot_table('rating', index = 'title', columns='userId')
print(movie_user_rating.shape)
movie_user_rating[:5]

(9064, 671)


userId,1,2,3,4,5,6,7,8,9,10,...,662,663,664,665,666,667,668,669,670,671
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",,,,,,,,,,,...,,,,,,,,,,
$9.99 (2008),,,,,,,,,,,...,,,,,,,,,,
'Hellboy': The Seeds of Creation (2004),,,,,,,,,,,...,,,,,,,,,,
'Neath the Arizona Skies (1934),,,,,,,,,,,...,,,,,,,,,,
'Round Midnight (1986),,,,,,,,,,,...,,,,,,,,,,


In [44]:
movie_user_rating.fillna(0, inplace = True)
print(movie_user_rating.shape)
movie_user_rating.head(3)

(9064, 671)


userId,1,2,3,4,5,6,7,8,9,10,...,662,663,664,665,666,667,668,669,670,671
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$9.99 (2008),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# SVD 사용

**SVD(Singular Value Decomposion), 특이값 분해**란 m x n 크기의 데이터 행렬 A를 아래와 같이 분해하는 것을 말합니다.

![15](https://user-images.githubusercontent.com/24634054/73115129-93138c00-3f65-11ea-9a10-80abc59a8494.JPG)


출처 : https://ratsgo.github.io/from%20frequency%20to%20semantics/2017/04/06/pcasvdlsa/

행렬 U와 V에 속한 열벡터는 특이벡터(singular vector)로 불리고, 이 특이벡터들은 서로 직교하는 성질을 가지고 있습니다.
또한, 가운데 시그마 기호로 보이는 것은 이것도 행렬인데요. 이 행렬은 대각행렬(diagonal matrix) 성질을 가지고 있습니다. 그래서 대각 성분이 행렬 A의 특이값이고 나머지 성분이 0입니다.

##  TruncatedSVD
그리고 사이킷런에서 제공해주는 truncated SVD는 이러한 SVD의 변형입니다.
**truncated SVD**는 시그마 행렬의 대각원소(특이값) 가운데 상위 n개만 골라낸 것입니다. 이렇게 하면 기존 행렬 A의 성질을 100% 원복할 수는 없지만, (그 만큼 데이터 정보를 압축) 행렬 A와 거의 근사한 값이 나오게 됩니다.



In [45]:
SVD = TruncatedSVD(n_components=12)
matrix = SVD.fit_transform(movie_user_rating)
matrix.shape

(9064, 12)

In [46]:
matrix[0]

array([ 0.01227491,  0.00250769,  0.01554856, -0.03397247, -0.01447075,
        0.00359021, -0.00226947,  0.04499243, -0.01683092, -0.02126522,
        0.0107549 , -0.01039575])

12개의 component로 차원을 축소했습니다. 

이제 이렇게 나온 데이터를 활용해서 피어슨 상관계수를 구합니다.

In [47]:
corr = np.corrcoef(matrix)
print(corr.shape)
corr[:10]

(9064, 9064)


array([[ 1.        ,  0.26913251,  0.01840859, ...,  0.42308414,
        -0.04769183,  0.15309891],
       [ 0.26913251,  1.        ,  0.10916845, ...,  0.18049075,
         0.78071081,  0.14156771],
       [ 0.01840859,  0.10916845,  1.        , ...,  0.45235392,
        -0.00829556, -0.04217744],
       ...,
       [ 0.47489172,  0.34025297,  0.60885233, ...,  0.82806304,
         0.01186597, -0.12681289],
       [ 0.48352093, -0.04016486,  0.10467327, ...,  0.47087506,
         0.00598573,  0.03867731],
       [ 0.39117656,  0.47545449,  0.15352821, ...,  0.23559076,
         0.40471446,  0.76664145]])

이렇게 나온 상관계수를 이용해서 특정 영화와 상관계수가 높은 영화를 뽑아줍니다.

In [48]:
df_corr = pd.DataFrame(corr, index=movie_user_rating.index,
                      columns =movie_user_rating.index)

In [49]:
df_corr

title,"""Great Performances"" Cats (1998)",$9.99 (2008),'Hellboy': The Seeds of Creation (2004),'Neath the Arizona Skies (1934),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),...,Zulu (1964),Zulu (2013),[REC] (2007),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931),İtirazım Var (2014)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",1.000000,0.269133,0.018409,0.150579,-0.133395,0.018409,0.426887,0.474892,0.483521,0.391177,...,0.001505,0.110988,0.270922,0.033012,0.235899,0.336990,0.016935,0.423084,-0.047692,0.153099
$9.99 (2008),0.269133,1.000000,0.109168,-0.134270,0.192633,0.109168,-0.036578,0.340253,-0.040165,0.475454,...,0.577227,-0.245568,0.547724,0.310368,0.309301,0.461722,0.131544,0.180491,0.780711,0.141568
'Hellboy': The Seeds of Creation (2004),0.018409,0.109168,1.000000,0.249554,0.093523,1.000000,0.048179,0.608852,0.104673,0.153528,...,0.093323,0.144450,0.289355,0.471215,0.330570,0.276847,-0.068688,0.452354,-0.008296,-0.042177
'Neath the Arizona Skies (1934),0.150579,-0.134270,0.249554,1.000000,-0.083869,0.249554,0.045416,0.065912,0.089343,0.265009,...,0.233652,0.506876,0.084290,0.069723,0.588762,0.032358,0.167473,0.306531,-0.209451,0.398952
'Round Midnight (1986),-0.133395,0.192633,0.093523,-0.083869,1.000000,0.093523,0.031013,0.336046,0.002105,0.249604,...,-0.001144,0.071539,-0.003413,0.231174,-0.064782,-0.037174,0.029077,0.043305,-0.021100,0.029782
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
xXx (2002),0.336990,0.461722,0.276847,0.032358,-0.037174,0.276847,0.021349,0.498649,0.037417,0.657077,...,0.164816,0.477696,0.441063,0.300132,0.123649,1.000000,0.553998,0.580712,0.301852,0.420789
xXx: State of the Union (2005),0.016935,0.131544,-0.068688,0.167473,0.029077,-0.068688,-0.027669,-0.154259,-0.059754,0.688803,...,-0.190027,0.478650,0.572477,-0.133682,0.353309,0.553998,1.000000,0.010870,0.265954,0.892539
¡Three Amigos! (1986),0.423084,0.180491,0.452354,0.306531,0.043305,0.452354,0.439472,0.828063,0.470875,0.235591,...,0.458546,0.513379,-0.044089,0.549796,0.117030,0.580712,0.010870,1.000000,-0.166170,-0.029849
À nous la liberté (Freedom for Us) (1931),-0.047692,0.780711,-0.008296,-0.209451,-0.021100,-0.008296,0.040534,0.011866,0.005986,0.404714,...,0.403246,-0.441900,0.710795,0.365069,0.307145,0.301852,0.265954,-0.166170,1.000000,0.332172


In [50]:
movie_user_rating.index[:10]

Index(['"Great Performances" Cats (1998)', '$9.99 (2008)',
       ''Hellboy': The Seeds of Creation (2004)',
       ''Neath the Arizona Skies (1934)', ''Round Midnight (1986)',
       ''Salem's Lot (2004)', ''Til There Was You (1997)',
       ''burbs, The (1989)', ''night Mother (1986)',
       '(500) Days of Summer (2009)'],
      dtype='object', name='title')

In [51]:
# 특정 영화와 상관계수가 큰 영화 10개를 추천한다

df_corr["Godfather, The (1972)"].sort_values(ascending=False)[1:11]

title
Godfather: Part II, The (1974)            0.989970
Goodfellas (1990)                         0.978178
One Flew Over the Cuckoo's Nest (1975)    0.951653
Once Upon a Time in America (1984)        0.940436
Apocalypse Now (1979)                     0.939483
L.A. Confidential (1997)                  0.932312
Platoon (1986)                            0.928905
Thin Red Line, The (1998)                 0.927027
Miller's Crossing (1990)                  0.921899
American Beauty (1999)                    0.919806
Name: Godfather, The (1972), dtype: float64

# 특정 사용자에게 추천하기

위에서는 하나의 영화에 대해서 비슷한 영화를 추천해주는 것을 적용했습니다.   
하지만, 보통 추천 시스템은 사용자에게 추천을 해주어야 합니다.  

사용자에게 영화 추천을 해주기 위해서 사용자 맞춤 협업 필터링 행렬 분해를 적용해보겠습니다.

In [52]:
df_user_movie_ratings = ratings.pivot(
    index='userId',
    columns='movieId',
    values='rating'
).fillna(0)

In [53]:
print(df_user_movie_ratings.shape)
df_user_movie_ratings.head()

(671, 9066)


movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


여기까지는 앞서 했던 것과 똑같습니다. 사용자-영화 pivot table을 만드는 것이죠.   

이제 아래와 같이 데이터를 조금 변경해서 진행하겠습니다.

1. pivot table을 matrix로 변환
2. np.mean(axis = 1)을 통해 각 사용자들이 매기는 평점 평균을 구함
3. 1에서 구한 값과 2에서 구한 값을 빼서 사용자-평균 데이터 값을 변경

In [54]:
matrix = df_user_movie_ratings.values
user_ratings_mean = np.mean(matrix, axis = 1)
matrix_user_mean = matrix - user_ratings_mean.reshape(-1, 1)

In [55]:
matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [5., 0., 0., ..., 0., 0., 0.]])

In [56]:
matrix.shape

(671, 9066)

In [57]:
user_ratings_mean.shape

(671,)

In [58]:
matrix_user_mean.shape

(671, 9066)

In [59]:
pd.DataFrame(matrix_user_mean, columns = df_user_movie_ratings.columns).head()

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
0,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,...,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625,-0.005625
1,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923,3.97077,...,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923,-0.02923
2,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,...,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075,-0.020075
3,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,3.902162,...,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838,-0.097838
4,-0.043128,-0.043128,3.956872,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,...,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128,-0.043128


여기까지 진행하면 초기에 만들었던 user-movie pivot table 값이 matrix_user_mean 변경되었습니다.

즉, 아래와 같이 변경된 것이죠.

1. 사용자 671명이 9066개의 영화에 대해 평점을 매긴 값이 존재
2. 사용자 671명의 각각 평균 평점을 반영 (개인간 바이어스 삭제)


## SVD를 이용해 Matrix Factorization 수행

- 앞에서는 scikit learn의 TruncatedSVD를 이용함
- scipy가 제공하는 Truncated SVD를 사용하겠다.
 - U, Sigma, Vt 를 얻는다
 - scipy.sparse.linalg.svds를 이용

In [62]:
# scipy에서 제공해주는 svd.  
# U 행렬, sigma 행렬, Vt 행렬을 반환

U, sigma, Vt = svds(matrix_user_mean, k = 12)

In [63]:
print(U.shape)
print(sigma.shape)
print(Vt.shape)

(671, 12)
(12,)
(12, 9066)


현재 이 Sigma 행렬은 0이 아닌 값만 1차원 행렬로 표현된 상태입니다.  
**즉, 0이 포함된 대칭행렬로 변환할 때는 numpy의 diag를 이용해야 합니다.**

In [64]:
sigma = np.diag(sigma)
sigma.shape

(12, 12)

In [65]:
sigma[0]

array([105.72437051,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ])

In [66]:
sigma[1]

array([  0.        , 110.15522471,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ])

## 원본 행렬로 복구

- U, Sigma, Vt의 내적을 수행 
- np.dot(np.dot(U, sigma), Vt)를 수행하며 사용자 평균을 더한다

In [67]:
# U, Sigma, Vt의 내적을 수행하면, 다시 원본 행렬로 복원이 된다. 
# 사용자 평균 rating을 더한다

svd_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

In [69]:
df_svd_preds = pd.DataFrame(svd_user_predicted_ratings, columns = df_user_movie_ratings.columns)
print(df_svd_preds.shape)
df_svd_preds.head()

(671, 9066)


movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
0,-0.079686,0.021779,-0.013837,-0.00587,-0.028877,0.032371,0.000715,-0.004428,-0.005219,0.038195,...,-0.004324,-0.004352,0.010478,-0.004256,-0.003944,-0.005674,0.018157,-0.005575,-0.005297,-0.003766
1,1.428452,1.608841,0.529476,0.168278,0.520809,1.107473,0.529719,0.089376,0.29627,1.970031,...,0.013227,-0.002275,0.02068,-0.005245,-0.007644,-0.021019,0.031243,-0.000957,-0.000753,0.026901
2,0.977246,0.396971,0.000299,0.027444,0.021287,0.141458,-0.057134,0.031633,-0.012538,0.383576,...,0.002761,0.004907,-0.01419,-0.000251,-0.006007,-0.003189,-0.026916,0.014637,0.013287,-0.005741
3,1.870844,1.169993,0.252202,0.094831,-0.181713,-0.511953,-0.02782,-0.14308,0.013247,1.461694,...,0.026412,-0.027245,0.054681,0.01845,0.034544,-0.03574,0.088889,-0.019365,-0.017113,0.066559
4,1.182777,0.924903,0.075998,0.061505,0.60268,-0.159825,0.339925,0.081534,-0.079666,0.535018,...,-0.029124,-0.029357,0.009064,-0.029092,-0.03089,-0.057453,0.026344,-0.024027,-0.024614,-0.032752


# 추천수행 함수

- 인자로 사용자 아이디, 영화 정보 테이블, 평점 테이블 등을 받음
- 사용자 아이디에 SVD로 나온 결과의 영화 평점이 가장 높은 데이터 순으로 정렬
- 사용자가 본 영화를 제외
- 사용자가 안 본 영화에서 평점이 높은 것을 추천

In [70]:
def recommend_movies(df_svd_preds, user_id, ori_movies_df, ori_ratings_df, num_recommendations=5):
    
    #현재는 index로 적용이 되어있으므로 user_id - 1을 해야함.
    user_row_number = user_id - 1 
    
    # 최종적으로 만든 pred_df에서 사용자 index에 따라 영화 데이터 정렬 -> 영화 평점이 높은 순으로 정렬 됌
    sorted_user_predictions = df_svd_preds.iloc[user_row_number].sort_values(ascending=False)
    
    # 원본 평점 데이터에서 user id에 해당하는 데이터를 추출 
    user_data = ori_ratings_df[ori_ratings_df.userId == user_id]
    
    # 위에서 뽑은 user_data와 원본 영화 데이터를 합친다. 
    user_history = user_data.merge(ori_movies_df, on = 'movieId').sort_values(['rating'], ascending=False)
    
    # 원본 영화 데이터에서 사용자가 본 영화 데이터를 제외한 데이터를 추출
    recommendations = ori_movies_df[~ori_movies_df['movieId'].isin(user_history['movieId'])]
    # 사용자의 영화 평점이 높은 순으로 정렬된 데이터와 위 recommendations을 합친다. 
    recommendations = recommendations.merge( pd.DataFrame(sorted_user_predictions).reset_index(), on = 'movieId')
    # 컬럼 이름 바꾸고 정렬해서 return
    recommendations = recommendations.rename(columns = {user_row_number: 'Predictions'}).sort_values('Predictions', ascending = False).iloc[:num_recommendations, :]
                      

    return user_history, recommendations

In [71]:
already_rated, predictions = recommend_movies(df_svd_preds, 330, movies, ratings, 10)

In [72]:
already_rated.head(10)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
47,330,1094,5.0,948577200,"Crying Game, The (1992)",Drama|Romance|Thriller
14,330,213,5.0,948577039,Burnt by the Sun (Utomlyonnye solntsem) (1994),Drama
34,330,527,5.0,948577164,Schindler's List (1993),Drama|War
45,330,1035,5.0,948574126,"Sound of Music, The (1965)",Musical|Romance
48,330,1172,5.0,948575236,Cinema Paradiso (Nuovo cinema Paradiso) (1989),Drama
53,330,1249,5.0,948576522,"Femme Nikita, La (Nikita) (1990)",Action|Crime|Romance|Thriller
27,330,373,5.0,948576522,Red Rock West (1992),Thriller
58,330,1428,5.0,948731410,Angel Baby (1995),Drama
61,330,1575,5.0,948577442,Gabbeh (1996),Drama
67,330,1683,5.0,948578017,"Wings of the Dove, The (1997)",Drama|Romance


In [73]:
predictions

Unnamed: 0,movieId,title,genres,Predictions
246,296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,2.060064
294,356,Forrest Gump (1994),Comedy|Drama|Romance|War,1.772814
497,608,Fargo (1996),Comedy|Crime|Drama|Thriller,1.526111
487,590,Dances with Wolves (1990),Adventure|Drama|Western,1.50896
316,380,True Lies (1994),Action|Adventure|Comedy|Romance|Thriller,1.276646
40,47,Seven (a.k.a. Se7en) (1995),Mystery|Thriller,1.265219
313,377,Speed (1994),Action|Romance|Thriller,1.142113
214,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi,1.076539
295,357,Four Weddings and a Funeral (1994),Comedy|Romance,1.065911
488,592,Batman (1989),Action|Crime|Thriller,1.048861
