In [1]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 18190478336125512248
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14415560704
locality {
  bus_id: 1
  links {
  }
}
incarnation: 15473800359995433997
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
xla_global_id: 416903419
]


In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive/Data
import os 
cwd = os.getcwd()

In [None]:
import pandas as pd

# preprocess
df = pd.read_csv('ratings.csv')

# make user id go from 0 to N-1
df.userId = df.userId - 1

# make movie id go from 0 to M-1
movie2idx = {}
count = 0
for movie_id in set(df.movieId.values):
    movie2idx[movie_id] = count
    count += 1

df['movie_idx'] = df.movieId.map(movie2idx)
df = df.drop(columns=['timestamp'])


In [None]:
from keras.models import Model, load_model
from keras.layers import Input, Embedding, Dot, Add, Flatten
from keras.callbacks import ModelCheckpoint
from keras.regularizers import l2
from keras.optimizers import SGD

N = df.userId.max() + 1 # number of users
M = df.movie_idx.max() + 1 # number of movies
mu = df.rating.mean()
print(len(df), N, M, mu)

# parameters
K = 5
epochs = 20
reg = 0.0

# keras model
u = Input(shape=(1,))
m = Input(shape=(1,))
u_embedding = Embedding(N, K, embeddings_regularizer=l2(reg))(u) # (N, 1, K)
m_embedding = Embedding(M, K, embeddings_regularizer=l2(reg))(m) # (N, 1, K)

u_bias = Embedding(N, 1, embeddings_regularizer=l2(reg))(u) # (N, 1, 1)
m_bias = Embedding(M, 1, embeddings_regularizer=l2(reg))(m) # (N, 1, 1)
x = Dot(axes=2)([u_embedding, m_embedding]) # (N, 1, 1)

x = Add()([x, u_bias, m_bias])
x = Flatten()(x) # (N, 1)

# save callback
filepath = f'model-{K}' + '-{epoch:02d}-{loss:.3f}.hdf5'
checkpoint = ModelCheckpoint(filepath, verbose=1, save_best_only=False, save_weights_only=False)

model = Model(inputs=[u, m], outputs=x)
model.compile(
  loss='mse',
  optimizer=SGD(learning_rate=0.08, momentum=0.9),
  metrics=['mse'],
)
# model = load_model('model-10-14-0.549.hdf5')

# train
r = model.fit(
  x=[df.userId.values, df.movie_idx.values],
  y=df.rating.values - mu,
  epochs=epochs,
  initial_epoch=14,
  batch_size=256,
  callbacks=[checkpoint]
)


In [None]:
import matplotlib.pyplot as plt

# plot losses
plt.plot(r.history['loss'], label="train loss")
plt.legend()
plt.show()

# plot mse
plt.plot(r.history['mse'], label="train mse")
plt.legend()
plt.show()