
구현 참고: https://github.com/gbolmier/funk-svd/blob/master/funk_svd/fast_methods.py


In [2]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("../src")

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
ds_ratings = pd.read_csv("../ml-latest-small/ratings.csv")
ds_movies = pd.read_csv("../ml-latest-small/movies.csv")
ds_ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [5]:
seed = 1 # 
# 유저별 평가 개수를 얻어옵니다
rating_counts = ds_ratings.groupby("userId").count().movieId
# 100회 이상 평가한 유저 10명을 무작위로 선발합니다
random_users = rating_counts[rating_counts >= 100].sample(10, random_state=seed)
dropped_ratings = []

for user_id in random_users.index:
    user_ratings = ds_ratings[ds_ratings.userId == user_id]
    
    # 4점 이상 평가 5개, 4점 미만 평가 4개를 무작위로 선정합니다.
    dropped_ratings.append(
        user_ratings[user_ratings.rating >= 4].sample(5, random_state=seed)
    )
    dropped_ratings.append(
        user_ratings[user_ratings.rating < 4].sample(5, random_state=seed)
    )
    
dropped_ratings = pd.concat(dropped_ratings)
dropped_ratings

Unnamed: 0,userId,movieId,rating,timestamp
22513,153,8784,4.5,1525548730
22525,153,55247,5.0,1525548301
22458,153,1704,4.0,1525548099
22571,153,109374,4.0,1525548490
22593,153,168492,4.5,1525548280
...,...,...,...,...
1207,10,72330,3.5,1455399573
1235,10,95543,3.5,1455399376
1187,10,56367,3.5,1455301825
1203,10,70183,3.0,1455357636


In [6]:
# 선정된 평가들 100개는 학습에서 제외시킵니다
ds_ratings.drop(dropped_ratings.index, inplace=True)
ds_ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [121]:
learning_rate = 0.01
num_epochs = 100
regularization = 0.01
k = 64

# len(ds_movies)
len(ds_movies.movieId.unique())

9742

In [148]:
def normalize(X, min_value, max_value):
    return (X - X.min()) / (X.max() - X.min()) * (max_value - min_value) + min_value

class FunkSVD:
    def __init__(self, ratings, movies, k, regularization=0.2):
        self.ratings = ratings.copy()
        self.ratings.rating = normalize(self.ratings.rating, 0, 1)
        self.movies = movies
        self.global_mean = self.ratings.rating.mean()
        self.regularization = regularization
        
        # 유저, 아이템 ID가 인덱스와 일치하지 않으므로 변환하기 위한 사전
        self.user_ids = {user: i for i, user in enumerate(ratings.userId.unique())}
        self.item_ids = {item: i for i, item in enumerate(movies.movieId.unique())}
        
        self.num_users = len(self.user_ids)
        self.num_items = len(self.item_ids)
        
        # 유저 행렬(N x k), 아이템 행렬(M x k)
        self.users = np.random.normal(0.0, 1.0, (self.num_users, k))
        self.items = np.random.normal(0.0, 1.0, (self.num_items, k))
        
        # 편향 벡터들(N x 1, M x 1)
        self.user_biases = np.zeros(self.num_users)
        self.item_biases = np.zeros(self.num_items)

    def fit(self, num_epochs=100, learning_rate=0.005):
        for epoch in range(num_epochs):
            total_error = 0
            
            for i, (user_id, item_id, rating, timestamp) in self.ratings.iterrows():
                # 유저 벡터와 아이템 백터를 가져온 뒤 내적하여 예측합니다
                user_id = self.user_ids[user_id]
                item_id = self.item_ids[item_id]

                pred = np.dot(self.users[user_id], self.items[item_id]) + self.global_mean
                pred += self.user_biases[user_id] + self.item_biases[item_id]

                # 예측값과 실제 값의 차이(error)를 계산하고
                # learning rate만큼 갱신합니다.
                error = rating - pred
                
                self.user_biases[user_id] += learning_rate * (error -  self.regularization * self.user_biases[user_id])
                self.item_biases[item_id] += learning_rate * (error -  self.regularization * self.item_biases[item_id])
                
                self.users[user_id] += learning_rate * (error * self.items[item_id] - self.regularization * self.users[user_id])
                self.items[item_id] += learning_rate * (error * self.users[user_id] - self.regularization * self.items[item_id]) 

                total_error += error ** 2

            total_error /= len(self.ratings)

            print("epoch {0}, mean_error: {1}".format(epoch, total_error))
            
        return self
    
    def get_recommendations(self, user_id, k=None):
        user_movie_ids = self.ratings[self.ratings.userId == user_id].movieId
        
        user_row_id = self.user_ids[user_id]
        
        # 유저 행렬과 전체 아이템 행렬을 곱해서 모든 영화에 대한 예측값을 구합니다
        preds = np.dot(self.users[user_row_id], self.items.T) + self.global_mean
        preds = preds * 5
        # 예측값에서 유저가 이미 본 영화는 제외하고 점수순으로 정렬합니다.
        preds = pd.Series(preds, index=self.item_ids.keys())
        preds = preds[~preds.index.isin(user_movie_ids)]
        preds = pd.DataFrame({
            "movie_id": preds.index,
            "score": preds.values
        })
        preds.sort_values(by="score", ascending=False, inplace=True)
        preds["rank"] = range(1, len(preds) + 1)
        
        preds = preds.head(k) if k is not None else preds
    
        recommended_movies = self.movies[self.movies.movieId.isin(preds.movie_id)]
        recommended_movies = recommended_movies.merge(
            preds,
            left_on="movieId",
            right_on="movie_id"
        ).sort_values("rank")
        
        del recommended_movies["movie_id"]
        
        return recommended_movies
        
    def predict_rating(self, user_id, item_id):
        user_row_id = self.user_ids[user_id]
        item_row_id = self.item_ids[item_id]
        
        pred = np.dot(self.users[user_row_id], self.items[item_row_id]) + self.global_mean
        
        return pred * 5

In [149]:
funksvd = FunkSVD(ds_ratings, ds_movies, k).fit(learning_rate=learning_rate, num_epochs=10)
# funksvd.global_mean

epoch 0, mean_error: 13.316329108348272
epoch 1, mean_error: 0.6029268335678294
epoch 2, mean_error: 0.121714409761506
epoch 3, mean_error: 0.06089990971195851
epoch 4, mean_error: 0.04390227050429089
epoch 5, mean_error: 0.037437681775976214
epoch 6, mean_error: 0.03448388110979064
epoch 7, mean_error: 0.032939503772051876
epoch 8, mean_error: 0.032043029773714204
epoch 9, mean_error: 0.03147906761062425


In [150]:
funksvd.get_recommendations(609, 10)

Unnamed: 0,movieId,title,genres,score,rank
7,93805,Iron Sky (2012),Action|Comedy|Sci-Fi,59.730406,1
3,8518,Anna Christie (1930),Drama,59.513044,2
8,109317,Someone Marry Barry (2014),Comedy,58.915483,3
9,166183,Junior and Karlson (1968),Adventure|Animation|Children,57.040604,4
4,51666,"Abandoned, The (2006)",Horror|Mystery|Thriller,57.016808,5
1,4082,Barfly (1987),Comedy|Drama|Romance,56.39506,6
6,61394,"Onion Movie, The (2008)",Comedy,55.11601,7
0,3125,"End of the Affair, The (1999)",Drama,53.484077,8
2,5685,Real Women Have Curves (2002),Comedy|Drama,51.630308,9
5,56389,My Blueberry Nights (2007),Drama|Romance,51.09084,10


In [151]:
funksvd.predict_rating(609, 432)

0.6015662197744598

In [126]:
from svd import SVD


svd = SVD(ds_ratings, ds_movies, 64)

In [152]:
from IPython.display import display


win_rates = 0

for user_id in dropped_ratings.userId.unique():
    targets = dropped_ratings[dropped_ratings.userId == user_id].movieId
    svd_rec = svd.get_recommendations(user_id)
    svd_rec = svd_rec[svd_rec.movieId.isin(targets)]
    svd_rec["svd_rank"] = svd_rec.index
    svd_rec.index = svd_rec.movieId
    
    funksvd_rec = funksvd.get_recommendations(user_id)
    funksvd_rec = funksvd_rec[funksvd_rec.movieId.isin(targets)]
    funksvd_rec.index = funksvd_rec.movieId
    funksvd_rec.rename({"rank":"funksvd_rank"}, axis=1, inplace=True)
    
    svd_rec.drop(["recommendation_score", "movieId"], axis=1, inplace=True)
    result = pd.concat([svd_rec, funksvd_rec["funksvd_rank"]], axis=1)
    display(result)
    
    win_rate = (result.funksvd_rank < result.svd_rank).mean()
    win_rates += win_rate
    print("funksvd > svd : ", win_rate, "rating_count", ds_ratings[ds_ratings.userId == user_id].count() + 10)

print("total", win_rates / len(dropped_ratings.userId.unique()))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommendation_score"] = user_predictions.values


Unnamed: 0_level_0,title,genres,svd_rank,funksvd_rank
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1704,Good Will Hunting (1997),Drama|Romance,1284,5573
2671,Notting Hill (1999),Comedy|Romance,2007,5158
3408,Erin Brockovich (2000),Drama,2546,4514
8784,Garden State (2004),Comedy|Drama|Romance,5302,5468
55247,Into the Wild (2007),Action|Adventure|Drama,6580,2543
63082,Slumdog Millionaire (2008),Crime|Drama|Romance,6885,4280
69757,(500) Days of Summer (2009),Comedy|Drama|Romance,7075,3808
72998,Avatar (2009),Action|Adventure|Sci-Fi|IMAX,7212,5353
109374,"Grand Budapest Hotel, The (2014)",Comedy|Drama,8372,5693
168492,Call Me by Your Name (2017),Drama|Romance,9472,4548


funksvd > svd :  0.6 rating_count userId       179
movieId      179
rating       179
timestamp    179
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommendation_score"] = user_predictions.values


Unnamed: 0_level_0,title,genres,svd_rank,funksvd_rank
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
968,Night of the Living Dead (1968),Horror|Sci-Fi|Thriller,741.0,7420
1343,Cape Fear (1991),Thriller,1031.0,6742
2716,Ghostbusters (a.k.a. Ghost Busters) (1984),Action|Comedy|Sci-Fi,2038.0,4730
3147,"Green Mile, The (1999)",Crime|Drama,2372.0,4235
3727,Near Dark (1987),Horror|Western,2786.0,3167
8865,Sky Captain and the World of Tomorrow (2004),Action|Adventure|Sci-Fi,5330.0,7081
60941,"Midnight Meat Train, The (2008)",Horror|Mystery|Thriller,6812.0,2011
94864,Prometheus (2012),Action|Horror|Sci-Fi|IMAX,7888.0,802
107771,,,,582
111759,Edge of Tomorrow (2014),Action|Sci-Fi|IMAX,8438.0,7577


funksvd > svd :  0.3 rating_count userId       1302
movieId      1302
rating       1302
timestamp    1302
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommendation_score"] = user_predictions.values


Unnamed: 0_level_0,title,genres,svd_rank,funksvd_rank
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1259,Stand by Me (1986),Adventure|Drama,958,4442
1968,"Breakfast Club, The (1985)",Comedy|Drama,1445,4092
2085,101 Dalmatians (One Hundred and One Dalmatians...,Adventure|Animation|Children,1549,6052
2324,Life Is Beautiful (La Vita è bella) (1997),Comedy|Drama|Romance|War,1730,4769
4823,Serendipity (2001),Comedy|Romance,3529,4876
5816,Harry Potter and the Chamber of Secrets (2002),Adventure|Fantasy,4076,4159
74530,Percy Jackson & the Olympians: The Lightning T...,Adventure|Fantasy,7261,3
103107,20 Feet from Stardom (Twenty Feet from Stardom...,Documentary,8187,3592
112556,Gone Girl (2014),Drama|Thriller,8467,3011
116797,The Imitation Game (2014),Drama|Thriller|War,8569,3565


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommendation_score"] = user_predictions.values


funksvd > svd :  0.4 rating_count userId       500
movieId      500
rating       500
timestamp    500
dtype: int64


Unnamed: 0_level_0,title,genres,svd_rank,funksvd_rank
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
788,"Nutty Professor, The (1996)",Comedy|Fantasy|Romance|Sci-Fi,622,5402
1210,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Sci-Fi,911,4877
1291,Indiana Jones and the Last Crusade (1989),Action|Adventure,990,4552
1387,Jaws (1975),Action|Horror,1067,5593
2054,"Honey, I Shrunk the Kids (1989)",Adventure|Children|Comedy|Fantasy|Sci-Fi,1522,4237
2762,"Sixth Sense, The (1999)",Drama|Horror|Mystery,2078,4619
2890,Three Kings (1999),Action|Adventure|Comedy|Drama|War,2174,6846
3578,Gladiator (2000),Action|Adventure|Drama,2674,4603
5445,Minority Report (2002),Action|Crime|Mystery|Sci-Fi|Thriller,3873,5240
48738,"Last King of Scotland, The (2006)",Drama|Thriller,6327,5844


funksvd > svd :  0.1 rating_count userId       191
movieId      191
rating       191
timestamp    191
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommendation_score"] = user_predictions.values


Unnamed: 0_level_0,title,genres,svd_rank,funksvd_rank
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
150,Apollo 13 (1995),Adventure|Drama|IMAX,123,4657
208,Waterworld (1995),Action|Adventure|Sci-Fi,176,4801
1197,"Princess Bride, The (1987)",Action|Adventure|Comedy|Fantasy|Romance,899,4629
3793,X-Men (2000),Action|Adventure|Sci-Fi,2836,4833
7569,You Only Live Twice (1967),Action|Adventure|Sci-Fi|Thriller,4965,1787
8961,"Incredibles, The (2004)",Action|Adventure|Animation|Children|Comedy,5374,4389
49272,Casino Royale (2006),Action|Adventure|Thriller,6346,6346
60040,"Incredible Hulk, The (2008)",Action|Sci-Fi,6770,6354
60072,Wanted (2008),Action|Thriller,6773,2491
63540,Beverly Hills Chihuahua (2008),Adventure|Children|Comedy,6898,9459


funksvd > svd :  0.4 rating_count userId       207
movieId      207
rating       207
timestamp    207
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommendation_score"] = user_predictions.values


Unnamed: 0_level_0,title,genres,svd_rank,funksvd_rank
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
58998,Forgetting Sarah Marshall (2008),Comedy|Romance,6725,7504
68135,17 Again (2009),Comedy|Drama,7009,5014
70641,Miss March (2009),Comedy,7105,3400
87869,Horrible Bosses (2011),Comedy|Crime,7638,5978
88672,Our Idiot Brother (2011),Comedy,7659,557
94323,Think Like a Man (2012),Comedy,7874,681
107348,Anchorman 2: The Legend Continues (2013),Comedy,8321,1333
109372,About Last Night (2014),Comedy|Romance,8371,1956
131013,Get Hard (2015),Comedy|Crime,8816,745
140711,American Ultra (2015),Action|Comedy|Sci-Fi|Thriller,9023,6687


funksvd > svd :  0.9 rating_count userId       157
movieId      157
rating       157
timestamp    157
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommendation_score"] = user_predictions.values


Unnamed: 0_level_0,title,genres,svd_rank,funksvd_rank
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
185,"Net, The (1995)",Action|Crime|Thriller,156,5222
253,Interview with the Vampire: The Vampire Chroni...,Drama|Horror,217,5128
2683,Austin Powers: The Spy Who Shagged Me (1999),Action|Adventure|Comedy,2014,4530
2706,American Pie (1999),Comedy|Romance,2031,4567
2890,Three Kings (1999),Action|Adventure|Comedy|Drama|War,2174,7388
3698,"Running Man, The (1987)",Action|Sci-Fi,2760,1964
4310,Pearl Harbor (2001),Action|Drama|Romance|War,3197,4486
4447,Legally Blonde (2001),Comedy|Romance,3287,6146
5816,Harry Potter and the Chamber of Secrets (2002),Adventure|Fantasy,4076,5784
8874,Shaun of the Dead (2004),Comedy|Horror,5335,6167


funksvd > svd :  0.1 rating_count userId       140
movieId      140
rating       140
timestamp    140
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommendation_score"] = user_predictions.values


Unnamed: 0_level_0,title,genres,svd_rank,funksvd_rank
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
508,Philadelphia (1993),Drama,443,4623
1036,Die Hard (1988),Action|Crime|Thriller,793,5187
1148,Wallace & Gromit: The Wrong Trousers (1993),Animation|Children|Comedy|Crime,868,3363
1198,Raiders of the Lost Ark (Indiana Jones and the...,Action|Adventure,900,4750
1291,Indiana Jones and the Last Crusade (1989),Action|Adventure,990,4753
5816,Harry Potter and the Chamber of Secrets (2002),Adventure|Fantasy,4076,4589
8961,"Incredibles, The (2004)",Action|Adventure|Animation|Children|Comedy,5374,5105
50872,Ratatouille (2007),Animation|Children|Drama,6405,5243
56367,Juno (2007),Comedy|Drama|Romance,6638,6077
60069,WALL·E (2008),Adventure|Animation|Children|Romance|Sci-Fi,6772,4146


funksvd > svd :  0.4 rating_count userId       115
movieId      115
rating       115
timestamp    115
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommendation_score"] = user_predictions.values


Unnamed: 0_level_0,title,genres,svd_rank,funksvd_rank
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
317,"Santa Clause, The (1994)",Comedy|Drama|Fantasy,276,5294
355,"Flintstones, The (1994)",Children|Comedy|Fantasy,313,6929
415,Another Stakeout (1993),Comedy|Thriller,359,7405
457,"Fugitive, The (1993)",Thriller,398,4898
471,"Hudsucker Proxy, The (1994)",Comedy,409,2949
508,Philadelphia (1993),Drama,443,5718
531,"Secret Garden, The (1993)",Children|Drama,464,4231
551,"Nightmare Before Christmas, The (1993)",Animation|Children|Fantasy|Musical,483,6683
589,Terminator 2: Judgment Day (1991),Action|Sci-Fi,507,4781
593,"Silence of the Lambs, The (1991)",Crime|Horror|Thriller,510,4841


funksvd > svd :  0.0 rating_count userId       106
movieId      106
rating       106
timestamp    106
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommendation_score"] = user_predictions.values


Unnamed: 0_level_0,title,genres,svd_rank,funksvd_rank
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1907,Mulan (1998),Adventure|Animation|Children|Comedy|Drama|Musi...,1390.0,6556
56367,Juno (2007),Comedy|Drama|Romance,6638.0,5722
68954,Up (2009),Adventure|Animation|Children|Drama,7039.0,5298
70183,"Ugly Truth, The (2009)",Comedy|Drama|Romance,7087.0,3564
72330,,,,9038
94070,"Best Exotic Marigold Hotel, The (2011)",Comedy|Drama,7867.0,5929
95543,Ice Age 4: Continental Drift (2012),Adventure|Animation|Comedy,7929.0,8042
96079,Skyfall (2012),Action|Adventure|Thriller|IMAX,7955.0,4127
103339,White House Down (2013),Action|Drama|Thriller|IMAX,8201.0,7137
109487,Interstellar (2014),Sci-Fi|IMAX,8376.0,4316


funksvd > svd :  0.7 rating_count userId       140
movieId      140
rating       140
timestamp    140
dtype: int64
total 0.38999999999999996


In [128]:
from IPython.display import display


for user_id in dropped_ratings.userId.unique():
    ratings = dropped_ratings[dropped_ratings.userId == user_id] 
    rec = funksvd.get_recommendations(user_id)
    rec = rec[rec.movieId.isin(ratings.movieId)]
    
    rec = ratings.merge(rec, left_on="movieId", right_on="movieId")
    rec.sort_values(by="rating", ascending=False, inplace=True)
    rec["diff"] = rec.rating - rec.score
    display(rec)
    
    break

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,score,rank,diff
1,153,55247,5.0,1525548301,Into the Wild (2007),Action|Adventure|Drama,2.801153,7331,2.198847
0,153,8784,4.5,1525548730,Garden State (2004),Comedy|Drama|Romance,3.575066,4452,0.924934
4,153,168492,4.5,1525548280,Call Me by Your Name (2017),Drama|Romance,3.888095,3243,0.611905
2,153,1704,4.0,1525548099,Good Will Hunting (1997),Drama|Romance,3.400796,5151,0.599204
3,153,109374,4.0,1525548490,"Grand Budapest Hotel, The (2014)",Comedy|Drama,3.689779,3985,0.310221
6,153,69757,3.5,1525548722,(500) Days of Summer (2009),Comedy|Drama|Romance,3.947745,3017,-0.447745
9,153,3408,2.5,1525548836,Erin Brockovich (2000),Drama,3.260822,5708,-0.760822
7,153,63082,1.5,1525548393,Slumdog Millionaire (2008),Crime|Drama|Romance,3.50833,4712,-2.00833
5,153,72998,1.0,1525552980,Avatar (2009),Action|Adventure|Sci-Fi|IMAX,3.76954,3660,-2.76954
8,153,2671,1.0,1525550835,Notting Hill (1999),Comedy|Romance,2.091108,8791,-1.091108
