In [1]:
# Load files
import pandas as pd
df_rat = pd.read_csv('../data/ratings.csv')
df_mov = pd.read_csv('../data/movies.csv')
df_mov.drop_duplicates(subset=['title'], keep='first', inplace=True)
df = pd.merge(df_rat, df_mov , on='movieId', how='inner')

In [2]:
user = {'title' : ["Toy Story (1995)", "Jurassic Park (1993)" , "The Butterfly Effect (2004)", 
                "Braveheart (1995)", "Inception (2010)"],'rating' : [2,2,5,1,5]}

In [3]:
user_input = pd.DataFrame(user)
user_input

Unnamed: 0,title,rating
0,Toy Story (1995),2
1,Jurassic Park (1993),2
2,The Butterfly Effect (2004),5
3,Braveheart (1995),1
4,Inception (2010),5


In [4]:
uniq_mov = pd.DataFrame(df_rat['movieId'].unique())
df_uniq = pd.merge(uniq_mov, df_mov, left_on=0, right_on='movieId')
user_ratings = pd.merge(df_uniq, user_input, how = 'outer')
new_user = user_ratings['rating'].fillna(0)
new_user

0       2.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
9714    0.0
9715    0.0
9716    0.0
9717    0.0
9718    0.0
Name: rating, Length: 9719, dtype: float64

In [5]:
# load pretrained NMF_model
import pickle
saved_model = open('nmf_model.bin', 'rb').read()
model = pickle.loads(saved_model)

In [6]:
model.components_

array([[6.84785535e-02, 1.05975102e-02, 2.09585342e-02, ...,
        8.19053403e-02, 8.57659072e-02, 9.61214695e-02],
       [1.14275413e-02, 1.11969493e-01, 8.43206062e-02, ...,
        2.79486271e-02, 3.70072970e-04, 4.62506381e-02],
       [1.34921437e-02, 6.00985997e-02, 0.00000000e+00, ...,
        7.59772696e-03, 4.97053459e-03, 4.44125266e-02],
       ...,
       [0.00000000e+00, 0.00000000e+00, 8.66672988e-01, ...,
        1.98338363e-01, 2.65001619e-01, 4.65928346e-04],
       [3.92990526e+00, 0.00000000e+00, 0.00000000e+00, ...,
        1.94885037e-01, 3.16534843e-01, 2.06310434e-02],
       [5.36011845e-01, 0.00000000e+00, 1.20623740e+00, ...,
        2.13387602e-01, 1.21276037e-01, 0.00000000e+00]])

In [7]:
model.reconstruction_err_

11.988878995363423

In [8]:
Q = model.components_

In [9]:
import numpy as np
R = np.array(new_user).reshape(1, -1)

In [10]:
%%time
P = model.transform(R)

Wall time: 15.3 s


In [11]:
import numpy as np
Rhat = np.dot(P, Q)

In [12]:
df_uniq['recom'] = Rhat.T

In [13]:
rec_movies = df_uniq.sort_values('recom', ascending=False)['title'].head(10)
rec_movies

0                         Toy Story (1995)
244                       Inception (2010)
378    There's Something About Mary (1998)
337                       Notorious (1946)
297          Flirting With Disaster (1996)
436      O Brother, Where Art Thou? (2000)
44                Wizard of Oz, The (1939)
202                Longest Day, The (1962)
203                 Green Mile, The (1999)
157                 Howard the Duck (1986)
Name: title, dtype: object

In [14]:
recommended_movies = pd.merge(rec_movies, df_mov, on='title', how = 'inner')
#del recommended_movies['movieId']
recommended_movies = recommended_movies[['title','genres']]
recommended_movies

Unnamed: 0,title,genres
0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,Inception (2010),Action|Crime|Drama|Mystery|Sci-Fi|Thriller|IMAX
2,There's Something About Mary (1998),Comedy|Romance
3,Notorious (1946),Film-Noir|Romance|Thriller
4,Flirting With Disaster (1996),Comedy
5,"O Brother, Where Art Thou? (2000)",Adventure|Comedy|Crime
6,"Wizard of Oz, The (1939)",Adventure|Children|Fantasy|Musical
7,"Longest Day, The (1962)",Action|Drama|War
8,"Green Mile, The (1999)",Crime|Drama
9,Howard the Duck (1986),Adventure|Comedy|Sci-Fi
