In [1]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from deepctr.models import DeepFM
from deepctr.inputs import SparseFeat, get_feature_names

In [2]:
ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv', encoding = "ISO-8859-1")

In [3]:
movie_ratings = pd.merge(ratings, movies, on='movieId')

In [4]:
movie_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,2,3.5,1112486027,Jumanji (1995),Adventure|Children|Fantasy
1,5,2,3.0,851527569,Jumanji (1995),Adventure|Children|Fantasy
2,13,2,3.0,849082742,Jumanji (1995),Adventure|Children|Fantasy
3,29,2,3.0,835562174,Jumanji (1995),Adventure|Children|Fantasy
4,34,2,3.0,846509384,Jumanji (1995),Adventure|Children|Fantasy


In [5]:
sparse_features = ['userId', 'movieId', 'timestamp', 'title', 'genres']
target = ['rating']

In [6]:
for feature in sparse_features:
    lbe = LabelEncoder()
    movie_ratings[feature] = lbe.fit_transform(movie_ratings[feature])

In [7]:
movie_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,0,1,3.5,340880,6746,440
1,4,1,3.0,39435,6746,440
2,12,1,3.0,35834,6746,440
3,28,1,3.0,11193,6746,440
4,33,1,3.0,31468,6746,440


In [8]:
fixlen_feature_columns = [SparseFeat(feature, movie_ratings[feature].nunique()) for feature in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [9]:
train, test = train_test_split(movie_ratings, test_size = 0.2)
train_model_input = {name: train[name].values for name in feature_names}
test_model_input = {name:test[name].values for name in feature_names}

In [10]:
model = DeepFM(linear_feature_columns, dnn_feature_columns, task = 'regression')
model.compile('adam', 'mse', metrics=['mse'])
history = model.fit(train_model_input, train[target].values, batch_size=64, epochs = 10, verbose = True, validation_split=0.2)

Train on 671088 samples, validate on 167772 samples
Epoch 1/10


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
test_model_input

{'userId': array([2074, 2425,  767, ..., 6111,  815, 1683], dtype=int64),
 'movieId': array([ 3140,  1153,  2867, ..., 10927,   199,  4579], dtype=int64),
 'timestamp': array([434146, 287996, 209500, ..., 628220,   4644, 804933], dtype=int64),
 'title': array([ 4940,   762, 11575, ...,   334, 13025, 10982]),
 'genres': array([820, 309, 810, ..., 728,   1, 804])}

In [12]:
pred_ans = model.predict(test_model_input, batch_size = 64)

In [13]:
mse = round(mean_squared_error(test[target].values, pred_ans), 4)
rmse = mse ** 0.5
print('test RMSE', rmse)

test RMSE 0.8567963585356791
