In [1]:
import numpy as np
import pandas as pd

from keras.models import Sequential, Model
from keras.layers import Input, Embedding, Reshape, merge
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.regularizers import l2
from keras.optimizers import SGD, RMSprop, Adam

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
#path = "ml-20m/ml-20m/"
#path = "ml-20m/ml-10M100K/"
path = "ml-20m/ml-1m/"

In [3]:
ratings = pd.read_csv(path+'ratings.dat', sep='::')
movie_names = pd.read_csv(path+'movies.dat', sep='::')



  from ipykernel import kernelapp as app


In [4]:
usCol = '1'
mvCol = '1193'
rtCol = '5'
users = ratings[usCol].unique()
movies = ratings[mvCol].unique()


userid2idx = {o:i for i,o in enumerate(users)}
movieid2idx = {o:i for i,o in enumerate(movies)}


ratings[mvCol] = ratings[mvCol].apply(lambda x: movieid2idx[x])
ratings[usCol] = ratings[usCol].apply(lambda x: userid2idx[x])


user_min, user_max, movie_min, movie_max = (ratings[usCol].min(), 
    ratings[usCol].max(), ratings[mvCol].min(), ratings[mvCol].max())

n_users = ratings[usCol].nunique()
n_movies = ratings[mvCol].nunique()


In [5]:
#latent factors
n_factors = 50


In [6]:
np.random.seed = 42

msk = np.random.rand(len(ratings)) < 0.8
trn = ratings[msk]
val = ratings[~msk]



Dot product model

In [7]:
user_in = Input(shape=(1,), dtype='int64', name='user_in')
u = Embedding(n_users, n_factors, input_length=1, W_regularizer=l2(1e-4))(user_in)
movie_in = Input(shape=(1,), dtype='int64', name='movie_in')
m = Embedding(n_movies, n_factors, input_length=1, W_regularizer=l2(1e-4))(movie_in)

x = merge([u, m], mode='dot')
x = Flatten()(x)
model = Model([user_in, movie_in], x)


model.compile(Adam(0.001), loss='mse')


In [8]:
model.fit([trn[usCol], trn[mvCol]], trn[rtCol], batch_size=64, nb_epoch=1,
          validation_data=([val[usCol], val[mvCol]], val[rtCol]))


Train on 800555 samples, validate on 199653 samples
Epoch 1/1

KeyboardInterrupt: 

Use new learning rate

In [9]:
model.optimizer.lr=0.001


In [24]:
model.fit([trn[usCol], trn[mvCol]], trn[rtCol], batch_size=64, nb_epoch=6, 
          validation_data=([val[usCol], val[mvCol]], val[rtCol]))

Train on 800126 samples, validate on 200082 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x7f099e2dbb10>

Adding bias to the model

In [10]:
def embedding_input(name, n_in, n_out, reg):
    inp = Input(shape=(1,), dtype='int64', name=name)
    return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)

user_in, u = embedding_input('user_in', n_users, n_factors, 1e-4)
movie_in, m = embedding_input('movie_in', n_movies, n_factors, 1e-4)

def create_bias(inp, n_in):
    x = Embedding(n_in, 1, input_length=1)(inp)
    return Flatten()(x)

ub = create_bias(user_in, n_users)
mb = create_bias(movie_in, n_movies)

x = merge([u, m], mode='dot')
x = Flatten()(x)
x = merge([x, ub], mode='sum')
x = merge([x, mb], mode='sum')
model = Model([user_in, movie_in], x)
model.compile(Adam(0.001), loss='mse')


In [11]:
model.fit([trn[usCol], trn[mvCol]], trn[rtCol], batch_size=64, nb_epoch=6, 
          validation_data=([val[usCol], val[mvCol]], val[rtCol]))

Train on 800555 samples, validate on 199653 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x7f0233304850>

State-of-the-art: http://www.mymedialite.net/examples/datasets.html

Neural network model

In [12]:
user_in, u = embedding_input('user_in', n_users, n_factors, 1e-4)
movie_in, m = embedding_input('movie_in', n_movies, n_factors, 1e-4)

x = merge([u, m], mode='concat')
x = Flatten()(x)
x = Dropout(0.3)(x)
x = Dense(70, activation='relu')(x)
x = Dropout(0.75)(x)
x = Dense(1)(x)
nn = Model([user_in, movie_in], x)
nn.compile(Adam(0.001), loss='mse')


In [14]:
nn.fit([trn[usCol], trn[mvCol]], trn[rtCol], batch_size=64, nb_epoch=8,
          validation_data=([val[usCol], val[mvCol]], val[rtCol]))

Train on 800555 samples, validate on 199653 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f0245f0abd0>