In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [43]:
dataset = pd.read_csv("./data/the-movies-dataset/ratings_small.csv")# 100k small data set
# dataset = pd.read_csv('/home/jeongchanwoo/바탕화면/ratings.csv')

In [44]:
dataset.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,110,1.0,1425941529
1,1,147,4.5,1425942435
2,1,858,5.0,1425941523
3,1,1221,5.0,1425941546
4,1,1246,5.0,1425941556


In [45]:
len(dataset.userId.unique()), len(dataset.movieId.unique())

(270896, 45115)

In [46]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26024289 entries, 0 to 26024288
Data columns (total 4 columns):
userId       int64
movieId      int64
rating       float64
timestamp    int64
dtypes: float64(1), int64(3)
memory usage: 794.2 MB


In [47]:
### trainform data type int to object ( userId, movieId)
dataset.userId = dataset.userId.astype('category').cat.codes.values
dataset.movieId = dataset.movieId.astype('category').cat.codes.values

In [48]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26024289 entries, 0 to 26024288
Data columns (total 4 columns):
userId       int32
movieId      int32
rating       float64
timestamp    int64
dtypes: float64(1), int32(2), int64(1)
memory usage: 595.6 MB


In [49]:
dataset.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,0,108,1.0,1425941529
1,0,145,4.5,1425942435
2,0,843,5.0,1425941523
3,0,1195,5.0,1425941546
4,0,1218,5.0,1425941556


## Model creation

In [9]:
import tensorflow as tf

In [10]:
n_latent_factor_user = 8
n_latent_factor_moive = 10
n_latent_factor_mf = 3
n_users, n_movies = len(dataset.userId.unique()), len(dataset.movieId.unique())

In [11]:
dataset.movieId.unique()

array([  30,  833,  859, ...,  115, 3712, 4629])

In [12]:
# movie_feature_column  = tf.feature_column.categorical_column_with_vocabulary_list(key='terms', vocabulary_list=dataset.movieId.unique())
from tensorflow import keras

In [13]:
movie_input = keras.layers.Input(shape = [1], name = 'Movie')
movie_embedding_mlp = keras.layers.Embedding(n_movies +1 , n_latent_factor_moive, name = 'Movie_embedding_MLP')(movie_input)
movie_vec_mlp = keras.layers.Flatten(name = 'FlattenMovies_MLP')(movie_embedding_mlp)
movie_vec_mlp = keras.layers.Dropout(0.2)(movie_vec_mlp)

movie_embedding_mf = keras.layers.Embedding(n_movies+1 , n_latent_factor_mf, name = 'Movie_embedding_MF')(movie_input)
movie_vec_mf = keras.layers.Flatten(name='FlattenMovies_MF')(movie_embedding_mf)
movie_vec_mf = keras.layers.Dropout(0.2)(movie_vec_mf)

user_input = keras.layers.Input(shape=[1],name='User')
user_embedding_mlp = keras.layers.Embedding(n_users +1 , n_latent_factor_user, name = 'Users_embedding_MLP')(user_input)
user_vec_mlp = keras.layers.Flatten(name = 'FlattenUser_MLP')(user_embedding_mlp)
user_vec_mlp = keras.layers.Dropout(0.2)(user_vec_mlp)

user_embedding_mf = keras.layers.Embedding(n_users +1 , n_latent_factor_mf, name = 'Users_embedding_MF')(user_input)
user_vec_mf = keras.layers.Flatten(name = 'FlattenUser_MF')(user_embedding_mf)
user_vec_mf = keras.layers.Dropout(0.2)(user_vec_mf)

In [14]:
(movie_embedding_mf, movie_embedding_mlp, user_embedding_mf, user_embedding_mlp)

(<tf.Tensor 'Movie_embedding_MF/GatherV2:0' shape=(?, 1, 3) dtype=float32>,
 <tf.Tensor 'Movie_embedding_MLP/GatherV2:0' shape=(?, 1, 10) dtype=float32>,
 <tf.Tensor 'Users_embedding_MF/GatherV2:0' shape=(?, 1, 3) dtype=float32>,
 <tf.Tensor 'Users_embedding_MLP/GatherV2:0' shape=(?, 1, 8) dtype=float32>)

In [15]:
concat = keras.layers.concatenate([movie_vec_mlp, user_vec_mlp])
concat_dropout = keras.layers.Dropout(0.2)(concat)
dense_1 = keras.layers.Dense(200, name ='FullyConnected_1')(concat_dropout)
dense_batch_1 = keras.layers.BatchNormalization(name='Batch')(dense_1)
dropout_1 = keras.layers.Dropout(0.2, name ='Dropout_1')(dense_batch_1)
dense_2 = keras.layers.Dense(100, name = 'FullyConnected_2')(dropout_1)
dense_batch_2 = keras.layers.BatchNormalization(name = 'Batch_2')(dense_2)

dropout_2 = keras.layers.Dropout(0.2, name='Dropout_2')(dense_batch_2)
dense_3 = keras.layers.Dense(50, name = 'FullyConnected_3')(dropout_2)
dense_4 = keras.layers.Dense(20, name = 'FullyConnected_4', activation='relu')(dense_3)

In [16]:
(movie_vec_mlp.shape , user_vec_mlp.shape)

(TensorShape([Dimension(None), Dimension(10)]),
 TensorShape([Dimension(None), Dimension(8)]))

In [17]:
concat.shape

TensorShape([Dimension(None), Dimension(18)])

In [18]:
(user_vec_mf.shape, movie_vec_mf.shape)

(TensorShape([Dimension(None), Dimension(3)]),
 TensorShape([Dimension(None), Dimension(3)]))

In [19]:
movie_vec_mf

<tf.Tensor 'dropout_1/cond/Merge:0' shape=(?, 3) dtype=float32>

In [20]:
user_vec_mf

<tf.Tensor 'dropout_3/cond/Merge:0' shape=(?, 3) dtype=float32>

In [21]:
# movie_vec_mf = keras.backend.transpose(movie_vec_mf) # movie_vec_mf shape=(?, 3) to (3,?)

In [22]:
movie_vec_mf

<tf.Tensor 'dropout_1/cond/Merge:0' shape=(?, 3) dtype=float32>

In [23]:
pred_mf = keras.layers.multiply([movie_vec_mf, user_vec_mf], name = 'Elemet_wise_product')
pred_mlp = keras.layers.Dense(1, activation='relu', name = 'Activation_pred')(dense_4)

In [24]:
(pred_mf, pred_mlp)

(<tf.Tensor 'Elemet_wise_product/mul:0' shape=(?, 3) dtype=float32>,
 <tf.Tensor 'Activation_pred/Relu:0' shape=(?, 1) dtype=float32>)

In [25]:
combine_mlp_mf = keras.layers.concatenate([pred_mf,pred_mlp],name = 'Concat_MF_MLP')
result_combine = keras.layers.Dense(100, name='Combine_MF_MLP')(combine_mlp_mf)

deep_combine = keras.layers.Dense(100, name = 'FullyConnected_5')(result_combine)
result = keras.layers.Dense(1, name='Prediction')(deep_combine)

model = keras.Model([user_input, movie_input], result)
opt = keras.optimizers.Adam(lr = 0.01)
model.compile(optimizer = 'adam', loss = 'mean_absolute_error',metrics=['mae'])

In [26]:
from IPython.display import SVG
# from tensorflow.keras.utils import plot_model

In [27]:
# SVG(plot_model(model, show_shapes=False,to_file='test_model', show_layer_names=True, rankdir='TB')) 
# SVG(plot_model(model, show_shapes=False, show_layer_names=True, rankdir='TB').create(prog='dot',format='svg'))

In [28]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Movie (InputLayer)              (None, 1)            0                                            
__________________________________________________________________________________________________
User (InputLayer)               (None, 1)            0                                            
__________________________________________________________________________________________________
Movie_embedding_MLP (Embedding) (None, 1, 10)        90670       Movie[0][0]                      
__________________________________________________________________________________________________
Users_embedding_MLP (Embedding) (None, 1, 8)         5376        User[0][0]                       
__________________________________________________________________________________________________
FlattenMov

In [29]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [30]:
# cv = KFold(n_splits=10, shuffle=True, random_state=0)
# cross_val_score(model, 
#                 dataset.userId, dataset.movieId, scoring="neg_mean_absolute_error", cv=cv)


In [31]:
from sklearn.model_selection import train_test_split

In [32]:
train, test = train_test_split(dataset, test_size=0.2)

In [33]:
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta = 0.00001, patience=7, verbose=1, mode='min') 
### 0.00001 이상의 변화가 없을 때 10회만큼 epoch 더 부여 후 중지

In [34]:
model_epoch_train = model.fit([train.userId, train.movieId], train.rating, epochs=100, verbose=1, 
                           validation_split=0.1, callbacks = [early_stopping])

Train on 72002 samples, validate on 8001 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 00028: early stopping


In [35]:
from sklearn.metrics import mean_absolute_error


In [36]:
y_predict = np.round(model.predict([test.userId, test.movieId]), 0)

In [37]:
print(mean_absolute_error(test.rating, y_predict))
print(mean_absolute_error(test.rating,model.predict([test.userId, test.movieId])))

0.6900154992250388
0.6937431266344068


In [38]:
import h5py

In [39]:
# h5py.run_tests()

In [40]:
tf.keras.models.save_model(model=model, filepath='./data/model/neural_MF_1.h5')
# tf.keras.models.save_model(model=model, filepath='./data/model/neural_MF_1_big_file.h5')

In [50]:
model = keras.models.load_model('./data/model/neural_MF_1.h5',)

In [54]:
model

<tensorflow.python.keras._impl.keras.engine.training.Model at 0x7f2edeb26e80>

In [51]:
# y_predict_big = np.round(model.predict([dataset.userId,dataset.movieId]), 0)

# print(mean_absolute_error(dataset.rating, y_predict_big))
# print(mean_absolute_error(dataset.rating, model.predict([dataset.userId,dataset.movieId])))

InvalidArgumentError: indices[12,0] = 10177 is not in [0, 9067)
	 [[Node: Movie_embedding_MLP_1/GatherV2 = GatherV2[Taxis=DT_INT32, Tindices=DT_INT32, Tparams=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Movie_embedding_MLP_1/embeddings/read, Movie_embedding_MF_1/Cast, Movie_embedding_MF_1/GatherV2/axis)]]

Caused by op 'Movie_embedding_MLP_1/GatherV2', defined at:
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/asyncio/base_events.py", line 1434, in _run_once
    handle._run()
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-50-91a339ff27cb>", line 1, in <module>
    model = keras.models.load_model('./data/model/neural_MF_1.h5')
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/saving.py", line 241, in load_model
    model = model_from_config(model_config, custom_objects=custom_objects)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/saving.py", line 318, in model_from_config
    return deserialize(config, custom_objects=custom_objects)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/layers/serialization.py", line 63, in deserialize
    printable_module_name='layer')
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/utils/generic_utils.py", line 171, in deserialize_keras_object
    list(custom_objects.items())))
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/network.py", line 1067, in from_config
    process_node(layer, node_data)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/network.py", line 1025, in process_node
    layer(input_tensors[0], **kwargs)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/base_layer.py", line 314, in __call__
    output = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 717, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/layers/embeddings.py", line 158, in call
    out = array_ops.gather(self.embeddings, inputs)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 2736, in gather
    return gen_array_ops.gather_v2(params, indices, axis, name=name)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 3065, in gather_v2
    "GatherV2", params=params, indices=indices, axis=axis, name=name)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
    op_def=op_def)
  File "/home/jeongchanwoo/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): indices[12,0] = 10177 is not in [0, 9067)
	 [[Node: Movie_embedding_MLP_1/GatherV2 = GatherV2[Taxis=DT_INT32, Tindices=DT_INT32, Tparams=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Movie_embedding_MLP_1/embeddings/read, Movie_embedding_MF_1/Cast, Movie_embedding_MF_1/GatherV2/axis)]]
