In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import tensorflow as tf
from data_pre_processing.fetch_data import *
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:

X_train, Y_train = get_X_Y_train()
X_test = get_X_test()

# get number of total users:
n_train_users, _, n_train_movies = X_train.shape
n_test_users, _, n_train_movies = X_test.shape
n_users = n_train_users + n_test_users

all_user_ratings_for_all_train_movies = np.concatenate((X_train[:,1,:], X_test[:,1,:]), axis=0)
all_user_ratings_for_all_train_movies = tf.constant(\
                                                    all_user_ratings_for_all_train_movies, \
                                                    dtype=tf.float32, \
                                                    name="all_user_ratings_for_all_train_movies")

train_users_target_movie_ratings = tf.constant(Y_train,\
                                                dtype=tf.float32, \
                                                name="train_users_target_movie_ratings")

# hyperparamaters: 
n_dimensions = 10
target_loss_dominance = 0.8
regularization_coefficient = 0.01

n_epochs = 100000
learning_rate = 0.1

# Varibles:
train_movies_weights = tf.Variable(tf.random_uniform([n_train_movies, n_dimensions], -1.0, 1.0), name="train_movies_weights")
test_movie_weights = tf.Variable(tf.random_uniform([1, n_dimensions], -1.0, 1.0), name="test_movie_weights")
user_weights = tf.Variable(tf.random_uniform([n_users, n_dimensions], -1.0, 1.0), name="user_weights")


# calculating train loss
predictions = tf.matmul(user_weights, tf.transpose(train_movies_weights))
mask = ~tf.debugging.is_nan(all_user_ratings_for_all_train_movies)
non_nan_rated_movie_predictions = tf.boolean_mask(predictions, mask)
non_nan_rated_movie_ratings = tf.boolean_mask(all_user_ratings_for_all_train_movies, mask)
train_loss = tf.reduce_mean(tf.square(non_nan_rated_movie_predictions-non_nan_rated_movie_ratings), name="train_loss")


# calculating test loss
train_user_weights = user_weights[:n_train_users-1000,:]
target_predictions = tf.matmul(train_user_weights, tf.transpose(test_movie_weights))
error = train_users_target_movie_ratings[:-1000] - target_predictions
target_loss = tf.reduce_mean(tf.square(error), name="target_movie_loss")

regularization_penalty = regularization_coefficient*(tf.norm(train_movies_weights) + tf.norm(test_movie_weights) + tf.norm(user_weights))

alpha = tf.constant(target_loss_dominance, name="alpha")
loss = (1-alpha)*train_loss + alpha*(target_loss)



# rmse of validation set:
validation_train_user_weights = user_weights[n_train_users-1000:n_train_users,:]
validation_target_predictions = tf.matmul(validation_train_user_weights, tf.transpose(test_movie_weights))
validation_error = train_users_target_movie_ratings[-1000:] - validation_target_predictions
validation_rmse = tf.sqrt(tf.reduce_mean(tf.square(validation_error), name="validation_loss"))

optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
#     saver.restore(sess, "./tmp/my_model_lambda=0.01_epoch1000.ckpt")
    print("START:")
    for epoch in range(n_epochs):
        sess.run(training_op)
        
        if epoch % 20 == 0:
            print("Epoch", epoch, "loss =", loss.eval(), "validation_loss =", validation_rmse.eval())
            save_path = saver.save(sess, "./tmp/my_model_n_dims={}_alpha={}_lambda={}_every_20.ckpt".format(n_dimensions,target_loss_dominance,regularization_coefficient))
        if epoch % 500 == 0:
            save_path = saver.save(sess, "./tmp/my_model_n_dims={}_alpha={}_lambda={}_epoch{}.ckpt".format(n_dimensions,target_loss_dominance,regularization_coefficient,epoch))

        save_path = saver.save(sess, "./tmp/my_model_n_dims={}_alpha={}_lambda={}_final.ckpt".format(n_dimensions,target_loss_dominance,regularization_coefficient))


START:
Epoch 0 loss = 15.276577 validation_loss = 3.8934982
Epoch 20 loss = 14.816497 validation_loss = 3.8741686
Epoch 40 loss = 14.394819 validation_loss = 3.8546612
Epoch 60 loss = 13.954227 validation_loss = 3.8279395
Epoch 80 loss = 13.453611 validation_loss = 3.7867305
Epoch 100 loss = 12.881457 validation_loss = 3.725577
Epoch 120 loss = 12.261228 validation_loss = 3.6401832
Epoch 140 loss = 11.57214 validation_loss = 3.5288022
Epoch 160 loss = 10.814808 validation_loss = 3.392137
Epoch 180 loss = 10.023203 validation_loss = 3.231965
Epoch 200 loss = 9.22607 validation_loss = 3.0523474
Epoch 220 loss = 8.421657 validation_loss = 2.8581748
Epoch 240 loss = 7.6225834 validation_loss = 2.655342
Epoch 260 loss = 6.856269 validation_loss = 2.4495983
Epoch 280 loss = 6.1402984 validation_loss = 2.2477582
Epoch 300 loss = 5.478821 validation_loss = 2.0562937
Epoch 320 loss = 4.8782587 validation_loss = 1.8821657
Epoch 340 loss = 4.346813 validation_loss = 1.7314159
Epoch 360 loss = 3.8

In [None]:
def get_model_params(file_path):
    tf.reset_default_graph()

    train_movies_weights = tf.get_variable(name="train_movies_weights", shape=[n_train_movies, n_dimensions])
    test_movie_weights = tf.get_variable(name="test_movie_weights", shape=[1, n_dimensions])
    user_weights = tf.get_variable(name="user_weights", shape=[n_users, n_dimensions])

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, file_path)
        return train_movies_weights.eval(), test_movie_weights.eval(), user_weights.eval()

In [None]:
a,b,c = get_model_params("./tmp/my_model_lambda=0.01_every_20.ckpt")

In [None]:
a.shape


In [None]:
b.shape

In [None]:
c.shape