# DeepFM - no context

In [1]:
# Required modules

import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

2023-04-20 09:27:53.318413: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-20 09:27:53.481218: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-20 09:27:54.154720: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-20 09:27:54.158429: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load the data

use_cols = ['user_id:token', 'item_id:token', 'rating:float']
data = pd.read_csv('./dataset/tripadvisor/tripadvisor.inter', usecols=use_cols)
data.head()

Unnamed: 0,user_id:token,item_id:token,rating:float
0,5C28F393B23BB894523AE7126A7AE445,219668,5
1,3FA27F6E8AC712A82C69C4EDD8B912CC,223860,5
2,B99CFBB5411EDC8881D13B7A4B313ADA,75680,5
3,3FA27F6E8AC712A82C69C4EDD8B912CC,224783,5
4,7CEFF5C32BA1F3B186E7838C7D3FE25E,222984,5


In [3]:
# Encoding the user_id column

user_encoder = LabelEncoder()
data['user_id:token'] = user_encoder.fit_transform(data['user_id:token'].values)

# Encoding the item_id column
item_encoder = LabelEncoder()
data['item_id:token'] = item_encoder.fit_transform(data['item_id:token'].values)

In [4]:
# Renaming columns

data.columns = ['user_id', 'item_id', 'rating']

In [5]:
# Split the data into train and test sets

train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [6]:
# Define the number of unique users and movies

num_users = data['user_id'].nunique()
num_movies = data['item_id'].nunique()

# Define embedding size

embedding_size = 10

In [7]:
# Model definition

# Define the input shape
input_shape = (train_data.shape[1] - 1,)

# Define input layers
user_input = tf.keras.layers.Input(shape=(1,))
movie_input = tf.keras.layers.Input(shape=(1,))

# Define user embedding
user_embedding = tf.keras.layers.Embedding(num_users, embedding_size, input_length=1)(user_input)
user_embedding = tf.keras.layers.Flatten()(user_embedding)

# Define movie embedding
movie_embedding = tf.keras.layers.Embedding(num_movies, embedding_size, input_length=1)(movie_input)
movie_embedding = tf.keras.layers.Flatten()(movie_embedding)

# Concatenate user and movie embeddings
concat = tf.keras.layers.concatenate([user_embedding, movie_embedding])

# Define FM part
fm = tf.keras.layers.Dense(1, activation=None)(concat)

# Define DNN part
dnn = tf.keras.layers.Dense(64, activation='relu')(concat)
dnn = tf.keras.layers.Dense(32, activation='relu')(dnn)
dnn = tf.keras.layers.Dense(1, activation=None)(dnn)

# Concatenate FM and DNN parts
concat = tf.keras.layers.concatenate([fm, dnn])

# Define output layer
output = tf.keras.layers.Flatten()(concat)

# Define the model
model = tf.keras.models.Model(inputs=[user_input, movie_input], outputs=output)

In [8]:
# Compile the model

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='mse', metrics=['mae'])

In [9]:
# Fit the model

history = model.fit([train_data['user_id'], train_data['item_id']], 
                    train_data['rating'], 
                    validation_data=([test_data['user_id'], test_data['item_id']], test_data['rating']),
                    epochs=100, batch_size=64)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100


Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [10]:
# Evaluate the model

model.evaluate([test_data['user_id'], test_data['item_id']], test_data['rating'])



[1.4017395973205566, 0.9199087619781494]

In [11]:
# Plot the model

tf.keras.utils.plot_model(model, show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


## Cross validation

In [None]:
# Cross validation

from sklearn.model_selection import KFold

kf = KFold(n_splits=10)

for train_index, valid_index in kf.split(train_data):
    train_set = train_data.iloc[train_index]
    valid_set = train_data.iloc[valid_index]
    
    model = tf.keras.models.Model(inputs=[user_input, movie_input], outputs=output)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='mse', metrics=['mae'])
    
    model.fit([train_set['user_id'], train_set['item_id']], 
              train_set['rating'], 
              validation_data=([valid_set['user_id'], valid_set['item_id']], valid_set['rating']),
              epochs=100, batch_size=16, verbose=0)
    print(model.evaluate([test_data['user_id'], test_data['item_id']], test_data['rating']))