In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.gaussian_process.kernels import RBF
from ast import literal_eval
import warnings; warnings.simplefilter('ignore')
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

In [3]:
mf = pd.read_csv("movies_metadata.csv")
rsf = pd.read_csv("ratings_small.csv")
kf = pd.read_csv("keywords.csv")
cf = pd.read_csv("credits.csv")
lsf = pd.read_csv("links_small.csv")
lsf = lsf[lsf['tmdbId'].notnull()]['tmdbId']
rf = pd.read_csv('ratings.csv')

## Data Cleaning

In [4]:
mf = mf.drop(['belongs_to_collection','tagline','homepage','original_title','poster_path'],axis =1)
mf = mf.drop([19730,29503,35587])

In [5]:
mf["id"] = mf["id"].astype(int)

In [6]:
print(mf['id'].nunique())
mf = mf.merge(cf, on = "id")
mf = mf.merge(kf, on = 'id')
duplicate = (mf[mf['id'].duplicated()])
duplicate

mf.drop(duplicate.index, axis=0,inplace=True)
mf = mf.dropna(subset=['title'])
mf.shape

45433


(45429, 22)

In [7]:
smf = mf[mf["id"].isin(lsf)]
smf.shape
# smf["tagline"] = smf["tagline"].fillna(" ")

(9082, 22)

In [8]:
smf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9082 entries, 0 to 41669
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   adult                 9082 non-null   object 
 1   budget                9082 non-null   object 
 2   genres                9082 non-null   object 
 3   id                    9082 non-null   int32  
 4   imdb_id               9082 non-null   object 
 5   original_language     9082 non-null   object 
 6   overview              9070 non-null   object 
 7   popularity            9082 non-null   object 
 8   production_companies  9082 non-null   object 
 9   production_countries  9082 non-null   object 
 10  release_date          9082 non-null   object 
 11  revenue               9082 non-null   float64
 12  runtime               9082 non-null   float64
 13  spoken_languages      9082 non-null   object 
 14  status                9080 non-null   object 
 15  title               

In [9]:
smf['genres'] = smf['genres'].apply(literal_eval)
smf['crew'] = smf['crew'].apply(literal_eval)
smf['genre_size'] = smf['genres'].apply(lambda x:len(x))
smf['genre_size']

0        3
1        3
2        2
3        3
4        1
        ..
40952    1
41172    2
41225    4
41391    5
41669    2
Name: genre_size, Length: 9082, dtype: int64

In [10]:
def get_genre(x):
    for a in x:
        return a['name']
    return np.nan

In [11]:
smf['genre'] = smf['genres'].apply(get_genre)
smf['genre'] = smf['genre'].astype('str').apply(lambda x: str.lower(x.replace(" ", "")))
smf['genre'] = smf['genre'].apply(lambda x: [x])
smf['genre']

0          [animation]
1          [adventure]
2            [romance]
3             [comedy]
4             [comedy]
             ...      
40952          [drama]
41172       [thriller]
41225      [adventure]
41391         [action]
41669    [documentary]
Name: genre, Length: 9082, dtype: object

In [12]:
def get_director(x):
    for a in x:
        if a['job'] == 'Director':
            return a['name']
    return np.nan

In [13]:
smf['director'] = smf['crew'].apply(get_director)
smf['director'].reset_index()
smf['director'] = smf['director'].astype('str').apply(lambda x: str.lower(x.replace(" ", "")))
smf['director'] = smf['director'].apply(lambda x: [x])
smf['director']

0             [johnlasseter]
1              [joejohnston]
2             [howarddeutch]
3           [forestwhitaker]
4             [charlesshyer]
                ...         
40952        [greggchampion]
41172      [tinusureshdesai]
41225    [ashutoshgowariker]
41391          [hideakianno]
41669            [ronhoward]
Name: director, Length: 9082, dtype: object

In [14]:
smf['overview'] = smf['overview'].astype('str')
smf['overview'] = smf['overview'].apply(lambda x: [x])
smf['overview']

# Concatenate all the converted elements into a single string

0        [Led by Woody, Andy's toys live happily in his...
1        [When siblings Judy and Peter discover an ench...
2        [A family wedding reignites the ancient feud b...
3        [Cheated on, mistreated and stepped on, the wo...
4        [Just when George Banks has recovered from his...
                               ...                        
40952    [A man must cope with the loss of his wife and...
41172    [Rustom Pavri, an honourable officer of the In...
41225    [Village lad Sarman is drawn to big, bad Mohen...
41391    [From the mind behind Evangelion comes a hit l...
41669    [The band stormed Europe in 1963, and, in 1964...
Name: overview, Length: 9082, dtype: object

In [15]:
smf['overall'] = smf['genre'] +smf['director'] + smf['overview']
smf['overall'] = smf['overall'].apply(lambda x: ' '.join(x))
smf['overall']

0        animation johnlasseter Led by Woody, Andy's to...
1        adventure joejohnston When siblings Judy and P...
2        romance howarddeutch A family wedding reignite...
3        comedy forestwhitaker Cheated on, mistreated a...
4        comedy charlesshyer Just when George Banks has...
                               ...                        
40952    drama greggchampion A man must cope with the l...
41172    thriller tinusureshdesai Rustom Pavri, an hono...
41225    adventure ashutoshgowariker Village lad Sarman...
41391    action hideakianno From the mind behind Evange...
41669    documentary ronhoward The band stormed Europe ...
Name: overall, Length: 9082, dtype: object

In [16]:
tf = TfidfVectorizer(analyzer = 'word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(smf['overall'])

In [17]:
cos_matrix1 = cosine_similarity(tfidf_matrix)
cos_matrix1

array([[1.        , 0.00707227, 0.        , ..., 0.        , 0.        ,
        0.00477122],
       [0.00707227, 1.        , 0.01674929, ..., 0.00262777, 0.00211113,
        0.00414342],
       [0.        , 0.01674929, 1.        , ..., 0.        , 0.00263269,
        0.        ],
       ...,
       [0.        , 0.00262777, 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.00211113, 0.00263269, ..., 0.        , 1.        ,
        0.00173603],
       [0.00477122, 0.00414342, 0.        , ..., 0.        , 0.00173603,
        1.        ]])

In [18]:
smf = smf.reset_index()
titles = smf['title']
indices = pd.Series(smf.index, index=smf['title'])
print(indices)

title
Toy Story                                                0
Jumanji                                                  1
Grumpier Old Men                                         2
Waiting to Exhale                                        3
Father of the Bride Part II                              4
                                                      ... 
The Last Brickmaker in America                        9077
Rustom                                                9078
Mohenjo Daro                                          9079
Shin Godzilla                                         9080
The Beatles: Eight Days a Week - The Touring Years    9081
Length: 9082, dtype: int64


In [19]:
def get_recommendations(title, smf):
     l1 = []
     idx = indices[title]
     sim_scores = list(enumerate(cos_matrix1[idx]))
     sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
     sim_scores = sim_scores[1:31]
     movie_indices = [i[0] for i in sim_scores]
     l1 = list(titles.iloc[movie_indices])
     for i in range(len(l1)):
        smf1 = smf[smf['title'] == l1[i]]
     smf2 = smf[smf["title"].isin(l1)]
     smf2 = smf2.sort_values(by=['vote_average'],ascending=False)
     smf2 = smf2[["id","title","vote_average","vote_count","director","genre"]]  
     return smf2

In [20]:
get_recommendations('Rustom',smf)

Unnamed: 0,id,title,vote_average,vote_count,director,genre
4745,26246,Incident at Oglala,8.2,3.0,[michaelapted],[documentary]
874,521,Dial M for Murder,7.9,539.0,[alfredhitchcock],[crime]
3814,269,Breathless,7.7,322.0,[jean-lucgodard],[drama]
3034,93,Anatomy of a Murder,7.7,207.0,[ottopreminger],[crime]
2460,24226,The Verdict,7.4,132.0,[sidneylumet],[drama]
5676,17801,The Letter,7.4,42.0,[williamwyler],[crime]
2005,1847,The Long Goodbye,7.3,112.0,[robertaltman],[thriller]
2351,32255,The Palm Beach Story,7.3,46.0,[prestonsturges],[comedy]
5887,17208,Paradise Lost 2: Revelations,7.2,26.0,[joeberlinger],[documentary]
5056,16227,Dark Passage,7.2,81.0,[delmerdaves],[crime]


## Collaborative Filtering

In [21]:
count=0
display(rf)
print(len(rf['movieId'].unique()))
for mov in rf['movieId'].unique():
    for movie in smf['id'].unique():
        if(movie==mov):
            count+=1
print(count)
display(rf['movieId'].nunique())

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
3,1,1221,5.0,1425941546
4,1,1246,5.0,1425941556
...,...,...,...,...
26024284,270896,58559,5.0,1257031564
26024285,270896,60069,5.0,1257032032
26024286,270896,63082,4.5,1257031764
26024287,270896,64957,4.5,1257033990


45115
2433


45115

In [22]:
# mr=pd.DataFrame(columns =['movieId', 'ratings'], index = [x for x in range(len(rsf['movieId'].unique())-1)])
# i=0
# for mov in rsf['movieId'].unique():
#     mr['movieId'][i]=mov
#     mr['ratings'][i]=len(rsf.loc[rsf['movieId']==mov])
#     i+=1
# print(mr['ratings'].max())


In [23]:
tsmf = smf['id']
print(len(tsmf.unique()))
rf = rf[rf['movieId'].isin(tsmf)]
rf.shape
print(len(rf['movieId'].unique()))
rf.info()


9082
2433
<class 'pandas.core.frame.DataFrame'>
Int64Index: 8052909 entries, 0 to 26024271
Data columns (total 4 columns):
 #   Column     Dtype  
---  ------     -----  
 0   userId     int64  
 1   movieId    int64  
 2   rating     float64
 3   timestamp  int64  
dtypes: float64(1), int64(3)
memory usage: 307.2 MB


In [24]:
movie_ratings=pd.DataFrame(columns =['movieId', 'ratings'], index = [x for x in range(len(rf['movieId'].unique())-1)])
i=0
for mov in rf['movieId'].unique():
    movie_ratings['movieId'][i]=mov
    movie_ratings['ratings'][i]=len(rf.loc[rf['movieId']==mov])
    i+=1
    
movie_ratings=movie_ratings[movie_ratings['ratings']>20]
display(movie_ratings)

Unnamed: 0,movieId,ratings
0,110,66512
1,147,4967
2,858,57070
3,1246,25752
4,1968,26611
...,...,...
2303,1165,42
2308,27094,22
2313,74306,23
2314,81704,26


In [25]:
col_movie_ratings=movie_ratings['movieId']
rf = rf[rf['movieId'].isin(col_movie_ratings)]
rf=rf[rf['userId']<10000]
movie_ratings=movie_ratings[movie_ratings['movieId'].isin(rf['movieId'])]
display(rf)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
4,1,1246,5.0,1425941556
5,1,1968,4.0,1425942148
...,...,...,...,...
978487,9997,77866,4.5,1277470859
978505,9998,480,3.0,1439615051
978506,9998,593,0.5,1439615068
978514,9999,318,5.0,1501088709


In [26]:
u_m_matrix = rf.pivot(
    index='userId',
    columns='movieId',
    values='rating'
)

display(u_m_matrix)
u_m_df=u_m_matrix.reset_index()
for movie in u_m_matrix:
    print(movie)

movieId,2,5,6,11,12,13,14,15,16,18,...,95963,96821,96966,99861,100046,108401,132344,134368,134374,157851
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,5.0,,,,,,,,
2,,3.0,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,,,,,,,,,,,...,,,,,,,,,,
9996,,,,,,,,,,,...,,,,,,,,,,
9997,,,,,,,,,,,...,,,,,,,,,,
9998,,,,,,,,,,,...,,,,,,,,,,


2
5
6
11
12
13
14
15
16
18
19
20
21
22
24
25
26
28
33
35
38
55
58
59
62
63
64
65
66
67
68
69
70
71
73
74
75
76
77
78
79
80
81
82
83
85
86
87
88
89
90
93
95
96
97
98
99
100
101
103
104
105
106
107
108
110
111
112
113
114
116
117
118
120
121
122
123
128
129
132
134
135
136
137
138
139
140
141
142
144
145
146
147
148
149
150
152
153
154
155
156
157
158
161
162
163
164
165
167
168
169
170
172
173
174
175
176
177
178
179
180
182
184
185
186
187
189
192
193
194
195
196
197
198
199
200
201
203
204
205
207
212
213
214
215
216
217
218
219
220
222
223
226
227
228
229
231
232
233
234
235
236
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
257
258
259
260
261
262
266
267
268
269
272
273
274
275
276
277
278
279
280
281
284
285
287
288
289
290
291
292
293
294
296
297
298
299
300
301
302
303
306
307
308
309
310
311
314
315
316
318
319
320
321
322
326
327
329
330
331
332
334
335
336
337
338
339
340
342
343
345
346
348
350
363
364
377
378
379
380
381
383
387
388
389
391
392
393


In [27]:
nu_m_matrix = u_m_matrix.subtract(u_m_matrix.mean(axis=1), axis= 0)
display(nu_m_matrix)

movieId,2,5,6,11,12,13,14,15,16,18,...,95963,96821,96966,99861,100046,108401,132344,134368,134374,157851
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,0.9375,,,,,,,,
2,,-0.235294,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,,,,,,,,,,,...,,,,,,,,,,
9996,,,,,,,,,,,...,,,,,,,,,,
9997,,,,,,,,,,,...,,,,,,,,,,
9998,,,,,,,,,,,...,,,,,,,,,,


In [28]:
similarity=nu_m_matrix.T.corr()
display(similarity[1])

userId
1       1.0
2       NaN
3       1.0
4       NaN
5       NaN
       ... 
9995    NaN
9996    NaN
9997    NaN
9998    NaN
9999    NaN
Name: 1, Length: 9663, dtype: float64

In [29]:
similar_users= similarity[similarity[1] >0.3][1].sort_values(ascending= False)[:]
   # return similar_user
display(similar_users)
su_df=similar_users.reset_index()
su_df['similarity']=su_df[1]
su_df.drop(1,axis=1,inplace=True)
display(su_df)
for movie in u_m_matrix:
    su_df[movie]=u_m_df[u_m_df['userId'].isin(su_df['userId'])][movie]
su_df.fillna(0, inplace= True)
su_df.drop(index=0,axis=0,inplace= True)
display(su_df)



userId
1       1.000000
8398    1.000000
4378    1.000000
4359    1.000000
4305    1.000000
          ...   
4860    0.301511
1689    0.301511
8401    0.301511
7222    0.301511
1843    0.301511
Name: 1, Length: 984, dtype: float64

Unnamed: 0,userId,similarity
0,1,1.000000
1,8398,1.000000
2,4378,1.000000
3,4359,1.000000
4,4305,1.000000
...,...,...
979,4860,0.301511
980,1689,0.301511
981,8401,0.301511
982,7222,0.301511


Unnamed: 0,userId,similarity,2,5,6,11,12,13,14,15,...,95963,96821,96966,99861,100046,108401,132344,134368,134374,157851
1,8398,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4378,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4359,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4305,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4288,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979,4860,0.301511,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
980,1689,0.301511,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
981,8401,0.301511,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
982,7222,0.301511,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
def getrating(target_user):
    iumdf=u_m_df.loc[u_m_df['userId']==target_user]
    similar_users= similarity[similarity[target_user]>0.3][target_user].sort_values(ascending= False)[:]
    su_df=similar_users.reset_index()
    su_df['similarity']=su_df[target_user]
    su_df.drop(target_user,axis=1,inplace=True)
    
    for movie in u_m_matrix:
        su_df[movie]=u_m_df[u_m_df['userId'].isin(su_df['userId'])][movie]*(su_df['similarity'])
    su_df.fillna(0, inplace=True)
    
    for movie in u_m_matrix:
        iumdf.fillna(0,inplace= True)
        umlist=list(iumdf[movie])
        if(umlist[0]==0):
            if((su_df[su_df[movie]!=0]['similarity'].sum())!=0):
                iumdf[movie]= su_df[movie].sum()/(su_df[su_df[movie]!=0]['similarity'].sum())
            else:
                iumdf[movie]=0
    return iumdf
            

In [31]:
df3 = getrating(5575)
display(df3)

movieId,userId,2,5,6,11,12,13,14,15,16,...,95963,96821,96966,99861,100046,108401,132344,134368,134374,157851
5390,5575,3.147172,2.84101,3.777793,3.744859,2.943017,2.969891,3.231217,2.578268,3.95317,...,0,3.779548,0,0,0,0,0,3.133206,0,0


In [32]:
smf = smf[smf["id"].isin(movie_ratings['movieId'])]
display(smf[smf['id']==2058]['title'])
print(movie_ratings['movieId'].nunique())


1227    Addicted to Love
Name: title, dtype: object

2055


In [33]:
df1 = getrating(1)
display(movie_ratings)
df = get_recommendations('Avatar' , smf)
df['user_ratings']=0
display(df)
i=0
print(df['user_ratings'][2058])
# for index in df.index:
    

Unnamed: 0,movieId,ratings
0,110,66512
1,147,4967
2,858,57070
3,1246,25752
4,1968,26611
...,...,...
2164,2246,70
2165,1724,46
2166,396,33
2167,1360,64


Unnamed: 0,id,title,vote_average,vote_count,director,genre,user_ratings
2058,603,The Matrix,7.9,9079.0,[lanawachowski],[action],0
6102,775,A Trip to the Moon,7.9,314.0,[georgesméliès],[adventure],0
522,280,Terminator 2: Judgment Day,7.7,4274.0,[jamescameron],[action],0
953,679,Aliens,7.7,3282.0,[jamescameron],[horror],0
975,530,A Grand Day Out,7.4,199.0,[nickpark],[adventure],0
990,218,The Terminator,7.4,4208.0,[jamescameron],[action],0
885,601,E.T. the Extra-Terrestrial,7.3,3359.0,[stevenspielberg],[sciencefiction],0
140,8963,Crimson Tide,7.0,508.0,[tonyscott],[action],0
4103,320,Insomnia,6.8,1181.0,[christophernolan],[crime],0
5043,1882,The Men,6.5,18.0,[fredzinnemann],[drama],0


0


In [34]:
display(df1)

movieId,userId,2,5,6,11,12,13,14,15,16,...,95963,96821,96966,99861,100046,108401,132344,134368,134374,157851
0,1,3.027786,2.082313,3.598849,2.926845,1.5,3.533624,2.828813,2.746056,4.081841,...,0,5.0,5.0,0,0,0,0,3.062247,0,0


In [35]:
df['user_ratings'][140]=df1[140]
display(df)

Unnamed: 0,id,title,vote_average,vote_count,director,genre,user_ratings
2058,603,The Matrix,7.9,9079.0,[lanawachowski],[action],0.0
6102,775,A Trip to the Moon,7.9,314.0,[georgesméliès],[adventure],0.0
522,280,Terminator 2: Judgment Day,7.7,4274.0,[jamescameron],[action],0.0
953,679,Aliens,7.7,3282.0,[jamescameron],[horror],0.0
975,530,A Grand Day Out,7.4,199.0,[nickpark],[adventure],0.0
990,218,The Terminator,7.4,4208.0,[jamescameron],[action],0.0
885,601,E.T. the Extra-Terrestrial,7.3,3359.0,[stevenspielberg],[sciencefiction],0.0
140,8963,Crimson Tide,7.0,508.0,[tonyscott],[action],2.019598
4103,320,Insomnia,6.8,1181.0,[christophernolan],[crime],0.0
5043,1882,The Men,6.5,18.0,[fredzinnemann],[drama],0.0


## Hybrid system

In [36]:
def user_recommender (user_id,title):
    df1 = getrating(user_id)
    df = get_recommendations(title , smf)
    df['user_ratings']=0
    for movie in df['id']:
        df['user_ratings'][movie]=df1[movie]
    return df

In [37]:
getrating(1)

movieId,userId,2,5,6,11,12,13,14,15,16,...,95963,96821,96966,99861,100046,108401,132344,134368,134374,157851
0,1,3.027786,2.082313,3.598849,2.926845,1.5,3.533624,2.828813,2.746056,4.081841,...,0,5.0,5.0,0,0,0,0,3.062247,0,0


In [38]:
user_recommender(1, 'Avatar')

Unnamed: 0,id,title,vote_average,vote_count,director,genre,user_ratings
2058,603,The Matrix,7.9,9079.0,[lanawachowski],[action],0
6102,775,A Trip to the Moon,7.9,314.0,[georgesméliès],[adventure],0
522,280,Terminator 2: Judgment Day,7.7,4274.0,[jamescameron],[action],0
953,679,Aliens,7.7,3282.0,[jamescameron],[horror],0
975,530,A Grand Day Out,7.4,199.0,[nickpark],[adventure],0
990,218,The Terminator,7.4,4208.0,[jamescameron],[action],0
885,601,E.T. the Extra-Terrestrial,7.3,3359.0,[stevenspielberg],[sciencefiction],0
140,8963,Crimson Tide,7.0,508.0,[tonyscott],[action],0
4103,320,Insomnia,6.8,1181.0,[christophernolan],[crime],0
5043,1882,The Men,6.5,18.0,[fredzinnemann],[drama],0
