## Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
%matplotlib inline

## Importing the Whole Dataset

In [3]:
links = pd.read_csv('links.csv')

In [4]:
ratings = pd.read_csv('ratings.csv')

In [5]:
tags = pd.read_csv('tags.csv')

In [6]:
movies = pd.read_csv('movies.csv')

## User-Genre Matrix

#### All Genres in the Dataset

In [7]:
genres = []
for i in movies.index:
    genres.extend(movies['genres'][i].split('|'))
genres = list(set(genres))
genres.remove('(no genres listed)')

In [8]:
len(genres)

19

In [9]:
users = list(ratings['userId'].unique())

In [10]:
usergenre_matrix = pd.DataFrame(columns = genres, index = users)

In [11]:
movies_temp = movies.copy()
for i in range(9742):
    movies_temp['genres'].iloc[i] = (movies_temp['genres'].iloc[i]).split('|')
movies_temp = movies_temp.explode('genres')
movies_temp.index = np.arange(0,len(movies_temp))
movies_temp

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure
1,1,Toy Story (1995),Animation
2,1,Toy Story (1995),Children
3,1,Toy Story (1995),Comedy
4,1,Toy Story (1995),Fantasy
...,...,...,...
22079,193583,No Game No Life: Zero (2017),Fantasy
22080,193585,Flint (2017),Drama
22081,193587,Bungo Stray Dogs: Dead Apple (2018),Action
22082,193587,Bungo Stray Dogs: Dead Apple (2018),Animation


In [12]:
result = pd.merge(movies_temp, ratings, left_on = 'movieId', right_on = 'movieId')
result

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure,1,4.0,964982703
1,1,Toy Story (1995),Adventure,5,4.0,847434962
2,1,Toy Story (1995),Adventure,7,4.5,1106635946
3,1,Toy Story (1995),Adventure,15,2.5,1510577970
4,1,Toy Story (1995),Adventure,17,4.5,1305696483
...,...,...,...,...,...,...
274475,193583,No Game No Life: Zero (2017),Fantasy,184,3.5,1537109545
274476,193585,Flint (2017),Drama,184,3.5,1537109805
274477,193587,Bungo Stray Dogs: Dead Apple (2018),Action,184,3.5,1537110021
274478,193587,Bungo Stray Dogs: Dead Apple (2018),Animation,184,3.5,1537110021


In [13]:
x = result.groupby(['userId', 'genres']).mean()
x = x.drop(columns = ['movieId', 'timestamp'])
x

Unnamed: 0_level_0,Unnamed: 1_level_0,rating
userId,genres,Unnamed: 2_level_1
1,Action,4.322222
1,Adventure,4.388235
1,Animation,4.689655
1,Children,4.547619
1,Comedy,4.277108
...,...,...
610,Romance,3.731092
610,Sci-Fi,3.659363
610,Thriller,3.573529
610,War,3.776596


In [14]:
for i in genres:
    for j in users:
        try:
            usergenre_matrix[i][j] = x.loc[j, i][0]
        except:
            usergenre_matrix[i][j] = 0.0

In [15]:
usergenre_matrix

Unnamed: 0,Horror,Thriller,Musical,War,Animation,Drama,Action,Film-Noir,Children,Adventure,Fantasy,Western,Comedy,Mystery,Documentary,Crime,IMAX,Sci-Fi,Romance
1,3.47059,4.14545,4.68182,4.5,4.68966,4.52941,4.32222,5,4.54762,4.38824,4.29787,4.28571,4.27711,4.16667,0,4.35556,0,4.225,4.30769
2,3,3.7,0,4.5,0,3.88235,3.95455,0,0,4.16667,0,3.5,4,4,4.33333,3.8,3.75,3.875,4.5
3,4.6875,4.14286,0.5,0.5,0.5,0.75,3.57143,0,0.5,2.72727,3.375,0,1,5,0,0.5,0,4.2,0.5
4,4.25,3.55263,4,3.57143,4,3.48333,3.32,4,3.8,3.65517,3.68421,3.8,3.50962,3.47826,4,3.81481,3,2.83333,3.37931
5,3,3.55556,4.4,3.33333,4.33333,3.8,3.11111,0,4.11111,3.25,4.14286,3,3.46667,4,0,3.83333,3.66667,2.5,3.09091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,3.34615,3.52513,3.72727,3.79231,3.71429,3.78797,3.17881,3.8125,3.44898,3.5034,3.59794,3.41176,3.56532,3.79121,3.8,3.65414,3.0625,3.55696,3.74085
607,4.11429,4.11475,3.6,4.16667,3.33333,4.0122,3.72222,0,3.42105,3.46667,3.57143,4,3.32727,4.64706,0,3.81481,5,3.25,3.51724
608,3.31959,3.53668,2.75758,3.57895,3.11818,3.4375,3.33032,3.75,2.46023,3.22099,3,2.63636,2.73662,3.55072,3,3.61301,4,3.29641,2.88679
609,3.5,3.28571,0,3.5,3,3.36842,3.09091,0,3,3.2,3,4,3.28571,0,3,3.5,3,3,3.2


##### Normalize the User-Genre Matrix

In [16]:
usergenre_matrix=(usergenre_matrix-usergenre_matrix.min())/(usergenre_matrix.max()-usergenre_matrix.min())

In [17]:
usergenre_matrix

Unnamed: 0,Horror,Thriller,Musical,War,Animation,Drama,Action,Film-Noir,Children,Adventure,Fantasy,Western,Comedy,Mystery,Documentary,Crime,IMAX,Sci-Fi,Romance
1,0.694118,0.829091,0.936364,0.9,0.937931,0.889273,0.864444,1,0.909524,0.877647,0.859574,0.857143,0.855422,0.833333,0,0.871111,0,0.845,0.861538
2,0.6,0.74,0,0.9,0,0.737024,0.790909,0,0,0.833333,0,0.7,0.8,0.8,0.866667,0.76,0.75,0.775,0.9
3,0.9375,0.828571,0.1,0.1,0.1,0,0.714286,0,0.1,0.545455,0.675,0,0.2,1,0,0.1,0,0.84,0.1
4,0.85,0.710526,0.8,0.714286,0.8,0.643137,0.664,0.8,0.76,0.731034,0.736842,0.76,0.701923,0.695652,0.8,0.762963,0.6,0.566667,0.675862
5,0.6,0.711111,0.88,0.666667,0.866667,0.717647,0.622222,0,0.822222,0.65,0.828571,0.6,0.693333,0.8,0,0.766667,0.733333,0.5,0.618182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.669231,0.705025,0.745455,0.758462,0.742857,0.714815,0.635762,0.7625,0.689796,0.70068,0.719588,0.682353,0.713064,0.758242,0.76,0.730827,0.6125,0.711392,0.748169
607,0.822857,0.822951,0.72,0.833333,0.666667,0.767575,0.744444,0,0.684211,0.693333,0.714286,0.8,0.665455,0.929412,0,0.762963,1,0.65,0.703448
608,0.663918,0.707336,0.551515,0.715789,0.623636,0.632353,0.666065,0.75,0.492045,0.644199,0.6,0.527273,0.547324,0.710145,0.6,0.722603,0.8,0.659281,0.577358
609,0.7,0.657143,0,0.7,0.6,0.616099,0.618182,0,0.6,0.64,0.6,0.8,0.657143,0,0.6,0.7,0.6,0.6,0.64


## Movie Co-occurance Matrix (With Jaccard Similarity)

In [18]:
movie_matrix = pd.DataFrame(columns = movies.movieId, index = movies.movieId)

In [18]:
dff = ratings.copy()
dff =  ratings.groupby(['movieId'])['userId'].unique()
dff

movieId
1         [1, 5, 7, 15, 17, 18, 19, 21, 27, 31, 32, 33, ...
2         [6, 8, 18, 19, 20, 21, 27, 51, 62, 68, 82, 91,...
3         [1, 6, 19, 32, 42, 43, 44, 51, 58, 64, 68, 91,...
4                           [6, 14, 84, 162, 262, 411, 600]
5         [6, 31, 43, 45, 58, 66, 68, 84, 103, 107, 111,...
                                ...                        
193581                                                [184]
193583                                                [184]
193585                                                [184]
193587                                                [184]
193609                                                [331]
Name: userId, Length: 9724, dtype: object

In [19]:
'''for i in movie_matrix.index:
    for j in movie_matrix.index:
        try:
            print(i, j)
            movie_matrix[str(i)][j] = round(len(set(dff[i]).intersection(set(dff[j])))/len(set(dff[i]).union(set(dff[j]))), 2)
        except KeyError:
            movie_matrix[str(i)][j] = 0.0'''

'for i in movie_matrix.index:\n    for j in movie_matrix.index:\n        try:\n            print(i, j)\n            movie_matrix[str(i)][j] = round(len(set(dff[i]).intersection(set(dff[j])))/len(set(dff[i]).union(set(dff[j]))), 2)\n        except KeyError:\n            movie_matrix[str(i)][j] = 0.0'

In [20]:
movie_matrix = pd.read_csv('movie_matrix_final.csv')

In [21]:
movie_matrix.set_index(movies.movieId, inplace = True)

In [22]:
movie_matrix

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0,1,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,0,1,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,0,0,1,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0,0,0,0,1,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0,0,0,0,0,0,0,0,0,0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
193583,0,0,0,0,0,0,0,0,0,0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
193585,0,0,0,0,0,0,0,0,0,0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
193587,0,0,0,0,0,0,0,0,0,0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0


## Movie-Genre Matrix

In [19]:
movies_temp = movies.copy()

In [20]:
movies_temp = movies_temp.set_index('movieId')

In [21]:
movies_temp

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy
...,...,...
193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
193585,Flint (2017),Drama
193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [22]:
movie_genre = pd.DataFrame(columns = genres, index = movies.movieId)

In [23]:
movie_genre[genres] = 0

In [24]:
for i in movies.movieId:
    for j in movies_temp['genres'][i].split('|'):
        try:
            movie_genre[j][i] = 1
        except KeyError:
            pass

In [25]:
movie_genre

Unnamed: 0_level_0,Horror,Thriller,Musical,War,Animation,Drama,Action,Film-Noir,Children,Adventure,Fantasy,Western,Comedy,Mystery,Documentary,Crime,IMAX,Sci-Fi,Romance
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,0,0,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
4,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1
5,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0
193583,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0
193585,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
193587,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0


##### Predicting Top 10 Movies For each Users

In [26]:
user_movie_pred = {}
for i in users:
    print(i)
    array = (cosine_similarity(np.array(usergenre_matrix.loc[i]).reshape(1, 19), movie_genre))[0].argsort()[-10:][::-1]
    user_movie_pred[i] = list(movies.ix[array.tolist()].movieId)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat


176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat


303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat


430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentat

607
608
609
610


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """


In [27]:
user_movie_pred

{1: [81132, 26701, 1907, 56152, 2987, 4719, 52462, 71999, 6902, 43932],
 2: [81132, 26236, 79132, 31367, 4956, 459, 49530, 91542, 8481, 31921],
 3: [31804, 36509, 43932, 3113, 90345, 7001, 2232, 184253, 32213, 3837],
 4: [81132, 26701, 56152, 2987, 1907, 81847, 4719, 32031, 52462, 43932],
 5: [81847, 2987, 56152, 1907, 81132, 595, 32031, 1064, 364, 2092],
 6: [81132, 81847, 85261, 32031, 56152, 1907, 26701, 52462, 364, 79132],
 7: [81132, 26701, 43932, 2987, 52462, 56152, 1907, 6902, 4719, 36509],
 8: [81847, 1907, 364, 56152, 595, 81132, 2987, 85261, 32031, 631],
 9: [81132, 2987, 56152, 52462, 32031, 36397, 81847, 84637, 4306, 92348],
 10: [56152, 81847, 1907, 36397, 85261, 32031, 2987, 4719, 51939, 108932],
 11: [81132, 79132, 43932, 60684, 36509, 7235, 27317, 27683, 26236, 8481],
 12: [71999, 4719, 26236, 4956, 164226, 117646, 587, 47404, 26764, 161594],
 13: [81132, 43932, 36509, 71999, 6902, 31804, 91542, 198, 27683, 8481],
 14: [81132, 79132, 60684, 36509, 43932, 8481, 27683, 19

In [28]:
max(list(movie_genre.dot(usergenre_matrix.loc[1])))

8.571582404153379

## Predicting 15 Movies Based on TimeStamp

In [29]:
ratings_75 = ratings[ratings['timestamp'] <= ratings.describe()['timestamp']['75%']]

In [30]:
ratings_25 = ratings[ratings['timestamp'] > ratings.describe()['timestamp']['75%']]

In [31]:
common = list(set(ratings_75['userId'].unique()).intersection(set(ratings_25['userId'].unique())))

In [32]:
rating75_groupby = pd.DataFrame(ratings_75.groupby(['userId', 'timestamp'])['movieId'].unique())

In [33]:
movies_temp = movies.copy()
movie_temp = movies_temp.set_index('movieId')

In [35]:
user_movie_pred2 = {}
for i in common:
    user_movie_pred2[i] = []

In [36]:
num = 5
for user in common:
    #print(user)
    count = 0
    while len(user_movie_pred2[user]) != 15:
        print(user, count)
        movie_id = (rating75_groupby.loc[user])[::-1].iloc[count]['movieId']
        for i in movie_id:
            array = (cosine_similarity(np.array(movie_genre.loc[i]).reshape(1, 19), movie_genre))[0].argsort()[-num:][::-1]
            user_movie_pred2[user].extend(list(movies.ix[array.tolist()].movieId))
            if num > 1:
                num -= 1
        user_movie_pred2[user] = list(set(user_movie_pred2[user]).difference(set(ratings_75[ratings_75['userId'] == user].movieId.tolist())))
        count += 1

522 0
522 1
522 2
522 3
522 4
522 5
522 6
522 7
522 8
522 9
522 10
522 11
15 0
15 1
15 2
15 3
15 4
15 5
15 6
15 7
15 8
15 9
15 10
15 11
15 12
15 13
15 14
15 15
21 0
21 1
21 2
21 3
21 4


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

21 5
21 6
21 7
21 8
21 9
21 10
21 11
21 12
21 13
21 14
21 15
21 16
21 17
21 18
21 19
21 20
21 21
21 22
537 0
537 1
537 2
537 3
537 4
537 5
537 6
537 7
537 8
537 9
537 10
537 11


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

537 12
537 13
537 14
537 15
537 16
537 17
537 18
537 19
537 20
537 21
282 0
282 1
282 2
282 3
282 4
282 5
282 6
282 7
282 8
282 9
282 10
282 11
282 12
282 13
282 14
282 15
282 16
282 17
282 18
282 19
282 20
414 0


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

414 1
414 2
414 3
414 4
414 5
414 6
414 7
414 8
414 9
414 10
414 11
414 12
414 13
414 14
414 15
414 16
414 17
414 18
414 19
414 20
414 21
414 22
414 23
414 24
414 25
414 26
414 27
414 28
414 29
414 30


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

414 31
414 32
414 33
292 0
292 1
292 2
292 3
292 4
292 5
292 6
292 7
292 8
292 9
292 10
292 11
292 12
292 13
292 14
292 15
292 16
292 17
292 18
292 19
292 20
292 21
292 22
292 23
292 24
292 25
292 26
292 27
292 28
292

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

 29
177 0
177 1
177 2
177 3
177 4
177 5
177 6
177 7
177 8
177 9
177 10
177 11
177 12
177 13
177 14
177 15
177 16
177 17
177 18
177 19
443 0
443 1
443 2
443 3
443 4
443 5
443 6
443 7
443 8
443 9
443 10


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

443 11
443 12
443 13
443 14
443 15
443 16
443 17
443 18
443 19
443 20
443 21
443 22
318 0
318 1
318 2
318 3
318 4
318 5
318 6
318 7
318 8
318 9
318 10
318 11
318 12
318 13
318 14
318 15
318 16
318 17
318 18
318 19


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

318 20
448 0
448 1
448 2
448 3
448 4
448 5
448 6
448 7
448 8
448 9
448 10
448 11
448 12
448 13
448 14
448 15
448 16
448 17
448 18
448 19
448 20
448 21
448 22
193 0
193 1
193 2
193 3
193 4
193 5
193 6


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

193 7
193 8
193 9
193 10
193 11
193 12
193 13
193 14
193 15
193 16
193 17
193 18
68 0
68 1
68 2
68 3
68 4
68 5
68 6
68 7
68 8
68 9
68 10
68 11
68 12
68 13
68 14
68 15
68 16
68 17
68 18
68

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

 19
68 20
68 21
68 22
68 23
68 24
68 25
68 26
68 27
68 28
68 29
68 30
68 31
68 32
68 33
68 34
68 35
68 36
68 37
68 38
68 39
68 40
68 41
68 42
68 43
326 0
326 1
326 2
326 3
326 4
326 5
326 6


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

326 7
326 8
326 9
326 10
326 11
326 12
326 13
326 14
326 15
326 16
326 17
326 18
326 19
326 20
326 21
326 22
326 23
326 24
462 0
462 1
462 2
462 3
462 4
462 5
462 6
462 7
462 8
462 9
462 10
462 11
462 12


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

462 13
462 14
462 15
462 16
462 17
462 18
462 19
462 20
462 21
462 22
103 0
103 1
103 2
103 3
103 4
103 5
103 6
103 7
103 8
103 9
103 10
103 11
103 12
103 13
103 14
103 15
103 16
103 17
119 0
119 1
119 2
119 3
119

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

 4
119 5
119 6
119 7
119 8
119 9
119 10
119 11
119 12
119 13
119 14
119 15
119 16
249 0
249 1
249 2
249 3
249 4
249 5
249 6
249 7
249 8
249 9
249 10
249 11
249

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

 12
249 13
249 14
249 15
249 16
249 17
249 18
249 19
249 20
249 21
249 22
509 0
509 1
509 2
509 3
509 4
509 5
509 6
509 7
509 8
509 9
509 10
509 11
509 12
509 13
509 14
509 15
509 16
509 17
509 18
509 19
509 20
509 21
509 22
509 23
509 24
509 25
509 26


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

In [37]:
user_movie_pred2

{522: [2688,
  85056,
  2467,
  26052,
  31364,
  193609,
  4139,
  55820,
  53996,
  140850,
  1620,
  101076,
  6934,
  6617,
  70015],
 15: [609,
  805,
  40581,
  48744,
  193609,
  153386,
  32460,
  5582,
  4015,
  1616,
  5423,
  49649,
  103219,
  1591,
  379],
 21: [2688,
  131714,
  45442,
  805,
  40581,
  96935,
  48744,
  193609,
  70015,
  133867,
  106542,
  135887,
  8950,
  4407,
  59103],
 537: [135137,
  55553,
  1732,
  193609,
  117867,
  99532,
  117133,
  49649,
  4338,
  59103,
  81910,
  1271,
  379,
  6686,
  70015],
 282: [26464,
  86721,
  7714,
  7299,
  168418,
  48744,
  193609,
  89745,
  4338,
  59103,
  1909,
  8950,
  2583,
  81910,
  70015],
 414: [7299,
  40581,
  4966,
  3879,
  48744,
  193609,
  99532,
  88140,
  135536,
  3696,
  111732,
  81910,
  2583,
  131610,
  1245],
 292: [135137,
  27426,
  45442,
  1732,
  119655,
  3879,
  58025,
  81834,
  8782,
  89745,
  2322,
  103253,
  2582,
  1591,
  131610],
 177: [609,
  7299,
  40581,
  48744

In [38]:
for user in user_movie_pred2:
    match = set(user_movie_pred2[user]).intersection(set(ratings_25[ratings_25['userId'] == 15].movieId.tolist()))
    print(user)
    print(list(match), len(match))

522
[] 0
15
[] 0
21
[] 0
537
[] 0
282
[89745] 1
414
[] 0
292
[89745] 1
177
[] 0
443
[89745] 1
318
[] 0
448
[] 0
193
[48780] 1
68
[] 0
326
[] 0
462
[89745] 1
103
[] 0
119
[] 0
249
[] 0
509
[] 0


# Predicting ratings given by Users

In [39]:
user_rating = pd.DataFrame(ratings_25.groupby(['userId', 'movieId'])['rating'].unique())

In [40]:
user_rating = pd.DataFrame(user_rating['rating'].apply(lambda x: user_rating['rating'][0][0]))

In [41]:
movies_temp = movies.copy()

In [42]:
for i in range(9742):
    movies_temp['genres'].iloc[i] = (movies_temp['genres'].iloc[i]).split('|')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [43]:
movies_temp = movies_temp.explode('genres')

In [44]:
result = pd.merge(movies_temp, ratings_75, left_on = 'movieId', right_on = 'movieId')

In [45]:
usergenre_count = pd.DataFrame(result.groupby(['userId', 'genres'])['genres'].count())

In [46]:
usergenre_count = usergenre_count.rename(columns={'genres': 'count'})

In [47]:
usergenre_count

Unnamed: 0_level_0,Unnamed: 1_level_0,count
userId,genres,Unnamed: 2_level_1
1,Action,90
1,Adventure,85
1,Animation,29
1,Children,42
1,Comedy,83
...,...,...
609,Romance,5
609,Sci-Fi,5
609,Thriller,14
609,War,4


In [48]:
genres = []
for i in movies.index:
    genres.extend(movies['genres'][i].split('|'))
genres = list(set(genres))
genres.remove('(no genres listed)')
users = list(ratings_75['userId'].unique())

In [49]:
usergenre_matrix_75 = pd.DataFrame(columns = genres, index = users)

In [50]:
x = result.groupby(['userId', 'genres']).mean()

In [51]:
x = x.drop(columns = ['movieId', 'timestamp'])

In [52]:
for i in genres:
    for j in users:
        try:
            usergenre_matrix_75[i][j] = x.loc[j, i][0]
        except:
            usergenre_matrix_75[i][j] = 0.0

In [53]:
usergenre_matrix_75

Unnamed: 0,Horror,Thriller,Musical,War,Animation,Drama,Action,Film-Noir,Children,Adventure,Fantasy,Western,Comedy,Mystery,Documentary,Crime,IMAX,Sci-Fi,Romance
1,3.47059,4.14545,4.68182,4.5,4.68966,4.52941,4.32222,5,4.54762,4.38824,4.29787,4.28571,4.27711,4.16667,0,4.35556,0,4.225,4.30769
3,4.6875,4.14286,0.5,0.5,0.5,0.75,3.57143,0,0.5,2.72727,3.375,0,1,5,0,0.5,0,4.2,0.5
4,4.25,3.55263,4,3.57143,4,3.48333,3.32,4,3.8,3.65517,3.68421,3.8,3.50962,3.47826,4,3.81481,3,2.83333,3.37931
5,3,3.55556,4.4,3.33333,4.33333,3.8,3.11111,0,4.11111,3.25,4.14286,3,3.46667,4,0,3.83333,3.66667,2.5,3.09091
6,3.26316,3.54412,4.16667,3.58333,4.07143,3.61429,3.60938,2.5,3.61702,3.89362,3.53846,3.81818,3.37008,3.73333,0,3.28571,4.66667,3.47619,3.61429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,2.86667,2.92,3.07692,2.77778,2.94231,3.11864,3.22464,0,3.20755,3.26415,3.15217,3.33333,3.25843,3.4375,3,3.04545,3.67857,3.28846,3.33051
606,3.34615,3.52513,3.72727,3.79231,3.71429,3.78797,3.17881,3.8125,3.44898,3.5034,3.59794,3.41176,3.56532,3.79121,3.8,3.65414,3.0625,3.55696,3.74085
607,4.11429,4.11475,3.6,4.16667,3.33333,4.0122,3.72222,0,3.42105,3.46667,3.57143,4,3.32727,4.64706,0,3.81481,5,3.25,3.51724
608,3.31959,3.53668,2.75758,3.57895,3.11818,3.4375,3.33032,3.75,2.46023,3.22099,3,2.63636,2.73662,3.55072,3,3.61301,4,3.29641,2.88679


In [54]:
movies_temp = movies.copy()

In [55]:
for i in range(9742):
    movies_temp['genres'].iloc[i] = (movies_temp['genres'].iloc[i]).split('|')

In [56]:
result25 = pd.merge(movies_temp, ratings_25, left_on = 'movieId', right_on = 'movieId')

In [57]:
result25['predicted_rating'] = 0

In [58]:
result25 = result25.set_index('userId')

In [59]:
result25 = result25.ix[common]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


In [60]:
result25 = result25.reset_index()

In [61]:
for user in result25['userId'].unique():
    for movie in result25[result25['userId'] == user].movieId:
        temp = 0
        value = 0
        for genre in list(result25[result25['userId'] == user][result25['movieId'] == movie].genres)[0]:
            try:
                value += usergenre_matrix_75[genre][user]*usergenre_count.loc[user, genre]['count']
                temp += usergenre_count.loc[user, genre]['count']
            except:
                pass
        try:
            value /= temp
        except ZeroDivisionError:
            value = 0
        result25.loc[(result25['userId'] == user) & (result25['movieId'] == movie), 'predicted_rating'] = value

  """


In [62]:
result25

Unnamed: 0,userId,movieId,title,genres,rating,timestamp,predicted_rating
0,522,1200,Aliens (1986),"[Action, Adventure, Horror, Sci-Fi]",4.5,1449731613,3.875000
1,522,1214,Alien (1979),"[Horror, Sci-Fi]",5.0,1449731619,3.980392
2,522,55820,No Country for Old Men (2007),"[Crime, Drama]",4.5,1449724924,3.900000
3,522,97304,Argo (2012),"[Drama, Thriller]",2.5,1449724955,3.893382
4,522,106100,Dallas Buyers Club (2013),[Drama],0.5,1449724936,3.833333
...,...,...,...,...,...,...,...
1838,509,129229,Northmen - A Viking Saga (2014),"[Action, Adventure]",2.0,1435997905,3.300000
1839,509,130073,Cinderella (2015),"[Children, Drama, Fantasy, Romance]",4.0,1435997996,3.375776
1840,509,133419,Pitch Perfect 2 (2015),[Comedy],5.0,1435997941,3.159091
1841,509,136838,Kiss me Kismet (2006),"[Comedy, Romance]",2.5,1435998776,3.199029
