In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from keras.models import Model
from keras.layers import *
from keras.losses import *

Using TensorFlow backend.


In [2]:
FILE_PATH = 'data/'

In [3]:
original_dataset = pd.read_csv(FILE_PATH + 'ratings.csv')

In [4]:
original_dataset.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [5]:
dataset = original_dataset[original_dataset['movieId'] <= 1700.0]

In [6]:
user_idxs = np.array(dataset.userId, dtype = np.int)
movie_idxs = np.array(dataset.movieId, dtype = np.int)

ratings = np.array(dataset.rating)

In [7]:
n_users = int(dataset['userId'].drop_duplicates().max()) + 1
n_items = int(dataset['movieId'].drop_duplicates().max()) + 1
n_factors = 50

input_shape = (1,)

In [8]:
print(n_users)
print(n_items)

672
1700


In [9]:
class DeepCollaborativeFiltering(Model):
    def __init__(self, n_users, n_items, n_factors, p_dropout = 0.2):
        x1 = Input(shape = (1,))

        P = Embedding(n_users, n_factors, input_length = 1)(x1)
        P = Reshape((n_factors,))(P)

        x2 = Input(shape = (1,))

        Q = Embedding(n_items, n_factors, input_length = 1)(x2)
        Q = Reshape((n_factors,))(Q)

        x = concatenate([P, Q])
        x = Dropout(p_dropout)(x)

        x = Dense(n_factors)(x)
        x = Activation('relu')(x)
        x = Dropout(p_dropout)(x)

        output = Dense(1)(x)       
        
        super(DeepCollaborativeFiltering, self).__init__([x1, x2], output)
    
    def rate(self, user_idxs, item_idxs):
        if (type(user_idxs) == int and type(item_idxs) == int):
            return self.predict([np.array(user_idxs).reshape((1,)), np.array(item_idxs).reshape((1,))])
        
        
        return self.predict([user_idxs, item_idxs])

In [11]:
bs = 64
val_per = 0.25
epochs = 8

In [12]:
model = DeepCollaborativeFiltering(n_users, n_items, n_factors)

In [13]:
model.compile(optimizer = 'adam', loss = mean_squared_logarithmic_error)

In [14]:
model.fit(x = [user_idxs, movie_idxs], y = ratings, batch_size = bs, epochs = epochs, validation_split = val_per)

Train on 30000 samples, validate on 10001 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x1d18f656198>