In [1]:
%run env_setup.py
import os
import pandas as pd
import numpy as np
import keras
from lessdeep.datasets.grouplens import movielens, movielens_small

Using TensorFlow backend.


Load data

In [2]:
data_path = movielens.download_data()
sample_path = movielens_small.download_data()

In [3]:
path = sample_path

In [4]:
rating = pd.read_csv(os.path.join(path, 'ratings.csv'))

In [5]:
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


Just for display purpose read movie names

In [6]:
movie_names = pd.read_csv(os.path.join(path, 'movies.csv')).set_index('movieId')['title'].to_dict()

In [7]:
users = rating['userId'].unique()
movies = rating['movieId'].unique()
userid2index = {o : i for i, o in enumerate(users)}
movieid2index = {o : i for i, o in enumerate(movies)}

rating.movieId = rating.movieId.apply(lambda x: movieid2index[x])
rating.userId = rating.userId.apply(lambda x: userid2index[x])

In [8]:
n_users = rating.userId.nunique()
n_movies = rating.movieId.nunique()
n_users, n_movies

(671, 9066)

In [9]:
np.random.seed(7)
train_select = np.random.rand(len(rating)) < 0.8
train_set = rating[train_select]
val_set = rating[~train_select]
(len(train_set), len(val_set))

(79831, 20173)

## Creating models

In [10]:
n_factors = 50

In [11]:
user_in = keras.layers.Input(shape=(1,), dtype=rating.userId.dtype, name='user_in')
user_embedding = keras.layers.Embedding(input_dim=n_users, output_dim=n_factors,
                                        embeddings_regularizer=keras.regularizers.l2(1e-4))(user_in)
movie_in = keras.layers.Input(shape=(1,), dtype=rating.movieId.dtype, name='movie_in')
movie_embedding = keras.layers.Embedding(input_dim=n_movies, output_dim=n_factors,
                                         embeddings_regularizer=keras.regularizers.l2(1e-4))(movie_in)

In [15]:
#x = keras.layers.merge([user_embedding, movie_embedding], mode='dot')
x = keras.layers.dot([user_embedding, movie_embedding], axes=(2, 2))
x = keras.layers.Flatten()(x)
model = keras.Model([user_in, movie_in], outputs=x)

#import tensorflow as tf
#run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
#run_metadata = tf.RunMetadata()
model.compile(keras.optimizers.Adam(0.001), loss='mse') #, options=run_options, run_metadata=run_metadata)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_in (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
movie_in (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 50)        33550       user_in[0][0]                    
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 1, 50)        453300      movie_in[0][0]                   
__________________________________________________________________________________________________
dot_2 (Dot

In [27]:
def train(epoches, batch_size=512):
    model.fit([train_set.userId, train_set.movieId], train_set.rating, batch_size=batch_size, epochs=epoches,
              validation_data=([val_set.userId, val_set.movieId], val_set.rating))

In [26]:
train(1, batch_size=1024)

Train on 79831 samples, validate on 20173 samples
Epoch 1/1


In [28]:
model.optimizer.lr = 0.01

In [29]:
train(3)

Train on 79831 samples, validate on 20173 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
