In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
ratings = pd.read_csv( '/content/rating.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40


In [3]:
movies = pd.read_csv('/content/movie.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
g = ratings.groupby('userId')['rating'].count()
top_users = g.sort_values(ascending=False)[:15]

In [5]:
g = ratings.groupby('movieId')['rating'].count()
top_movies = g.sort_values(ascending=False)[:15]

In [6]:
top_r = ratings.join(top_users, rsuffix='_r', how='inner', on='userId')
top_r = top_r.join(top_movies, rsuffix='_r', how='inner', on='movieId')

In [7]:
pd.crosstab(top_r.userId, top_r.movieId, top_r.rating, aggfunc=np.sum)

movieId,1,50,110,150,260,296,318,356,457,480,527,589,593,1210,2571
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
156,5.0,5.0,5.0,5.0,,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
208,4.0,4.5,3.5,5.0,4.5,5.0,4.5,1.0,5.0,4.5,4.5,2.5,4.5,4.0,3.0
359,5.0,4.5,3.5,4.0,5.0,5.0,5.0,4.5,4.0,4.0,5.0,5.0,5.0,5.0,3.0
572,5.0,5.0,,5.0,,5.0,4.0,5.0,4.0,5.0,,3.5,4.5,,4.0
586,2.5,4.5,4.5,3.0,3.5,5.0,5.0,3.5,3.0,3.0,5.0,5.0,3.5,3.5,4.5
741,5.0,5.0,4.5,5.0,5.0,5.0,5.0,5.0,4.0,2.0,5.0,3.5,5.0,4.0,4.5
768,,4.0,4.0,3.0,4.5,5.0,4.5,4.0,4.0,3.0,,4.0,5.0,4.0,4.0
775,4.5,4.0,5.0,4.0,5.0,5.0,4.0,4.0,4.0,4.5,5.0,4.5,5.0,4.5,4.5
903,4.0,5.0,4.0,4.0,5.0,5.0,5.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,3.0
982,3.0,4.0,3.0,3.0,4.0,5.0,4.5,2.5,3.0,3.0,3.5,3.5,3.5,3.5,3.5


In [8]:
user_enc = LabelEncoder()
ratings['user'] = user_enc.fit_transform(ratings['userId'].values)
n_users = ratings['user'].nunique()

In [9]:
item_enc = LabelEncoder()
ratings['movie'] = item_enc.fit_transform(ratings['movieId'].values)
n_movies = ratings['movie'].nunique()

In [10]:
ratings['rating'] = ratings['rating'].values.astype(np.float32)
min_rating = min(ratings['rating'])
max_rating = max(ratings['rating'])
n_users, n_movies, min_rating, max_rating

(2184, 11122, 0.5, 5.0)

In [11]:
X = ratings[['user', 'movie']].values
y = ratings['rating'].values

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((287713, 2), (31969, 2), (287713,), (31969,))

In [13]:
n_factors = 50
X_train_array = [X_train[:, 0], X_train[:, 1]]
X_test_array = [X_test[:, 0], X_test[:, 1]]

In [14]:
from keras.models import Model
from keras.layers import Input, Reshape, Dot
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam
from keras.regularizers import l2

In [15]:
def RecommenderV1(n_users, n_movies, n_factors):
    user = Input(shape=(1,))
    u = Embedding(n_users, n_factors, embeddings_initializer='he_normal',embeddings_regularizer=l2(1e-6))(user)
    u = Reshape((n_factors,))(u)
    movie = Input(shape=(1,))
    m = Embedding(n_movies, n_factors, embeddings_initializer='he_normal',embeddings_regularizer=l2(1e-6))(movie)
    m = Reshape((n_factors,))(m)
    x = Dot(axes=1)([u, m])
    model = Model(inputs=[user, movie], outputs=x)
    opt = Adam(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt)
    return model

In [16]:
model = RecommenderV1(n_users, n_movies, n_factors)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 1, 50)        109200      input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 50)        556100      input_2[0][0]                    
______________________________________________________________________________________________

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [17]:
history = model.fit(x=X_train_array, y=y_train, batch_size=64, epochs=5,verbose=1, validation_data=(X_test_array, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [18]:
pred = model.predict(X_test_array)

In [19]:
pred

array([[3.149655 ],
       [5.1121593],
       [3.1296058],
       ...,
       [4.555512 ],
       [3.2309399],
       [2.0783477]], dtype=float32)

In [20]:
X_test_array

[array([ 207, 1175,  699, ..., 1301, 2094,  393]),
 array([10277,  1117,   572, ...,    57,  3424,   417])]

In [21]:
from keras.layers import Add, Activation, Lambda

In [35]:
class EmbeddingLayer:
    def __init__(self, n_items, n_factors):
        self.n_items = n_items
        self.n_factors = n_factors
    def __call__(self, x):
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal',embeddings_regularizer=l2(0.0003))(x)
        x = Reshape((self.n_factors,))(x)
        return x

In [36]:
def RecommenderV2(n_users, n_movies, n_factors, min_rating, max_rating):
    user = Input(shape=(1,))
    u = EmbeddingLayer(n_users, n_factors)(user)
    ub = EmbeddingLayer(n_users, 1)(user)
    
    movie = Input(shape=(1,))
    m = EmbeddingLayer(n_movies, n_factors)(movie)
    mb = EmbeddingLayer(n_movies, 1)(movie)
    x = Dot(axes=1)([u, m])
    x = Add()([x, ub, mb])
    x = Activation('softmax')(x)
    x = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(x)
    model = Model(inputs=[user, movie], outputs=x)
    opt = Adam(lr=0.0003)
    model.compile(loss='mean_squared_error', optimizer=opt, metrics=['accuracy'])
    return model

In [37]:
model = RecommenderV2(n_users, n_movies, n_factors, min_rating, max_rating)
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_10 (Embedding)        (None, 1, 50)        109200      input_7[0][0]                    
__________________________________________________________________________________________________
embedding_12 (Embedding)        (None, 1, 50)        556100      input_8[0][0]                    
____________________________________________________________________________________________

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [39]:
history = model.fit(x=X_train_array, y=y_train, batch_size=8, epochs=15,verbose=1, validation_data=(X_test_array, y_test))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
