In [13]:
# !pip install numpy
import numpy as np
import tensorflow as tf
from tensorflow import keras
from recsys_utils import *

In [14]:
X, W, b, num_movies, num_features, num_users = load_precalc_params_small()
Y, R = load_ratings_small()

In [15]:
X.shape , W.shape , b.shape , num_movies , num_features , num_users

((4778, 10), (443, 10), (1, 443), 4778, 10, 443)

In [16]:
Y.shape , R.shape

((4778, 443), (4778, 443))

In [17]:
movie_list  , df = load_Movie_List_pd()

In [18]:
def cost_function_for_collabfiltering(X,W,B,Y,R,lambda_):
  j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
  J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
  return J

In [19]:
cost_function_for_collabfiltering(X,W,b,Y,R,1)

<tf.Tensor: shape=(), dtype=float64, numpy=294610.3346320058>

In [20]:
personalised_rating = np.zeros((num_movies,1))
personalised_rating[62] = 1
personalised_rating[2716] = 5
personalised_rating[3618] = 1
personalised_rating[1398] = 5
personalised_rating[3083] = 5
personalised_rating[2112] = 3
personalised_rating[2755] = 4
personalised_rating[4170] = 4
personalised_rating[378] = 5
personalised_rating[3773] = 3
personalised_rating[4415] = 4
personalised_rating[3009] = 1
personalised_rating[1088] = 1
personalised_rating[3556] = 5

my_rated = [i for i in range(num_movies) if personalised_rating[i] > 0  ]
for i in range(num_movies):
  if(personalised_rating[i] > 0):
    print(f"User rated {float(personalised_rating[i])} to movie : {movie_list[i]}")

User rated 1.0 to movie : Mission: Impossible II (2000)
User rated 5.0 to movie : Ocean's Eleven (2001)
User rated 1.0 to movie : Bourne Supremacy, The (2004)
User rated 5.0 to movie : Batman Begins (2005)
User rated 3.0 to movie : Dark Knight, The (2008)
User rated 5.0 to movie : Inception (2010)
User rated 4.0 to movie : Social Network, The (2010)
User rated 1.0 to movie : Moneyball (2011)
User rated 5.0 to movie : Dark Knight Rises, The (2012)
User rated 5.0 to movie : Wolf of Wall Street, The (2013)
User rated 1.0 to movie : Interstellar (2014)
User rated 3.0 to movie : John Wick (2014)
User rated 4.0 to movie : The Intern (2015)
User rated 4.0 to movie : Now You See Me 2 (2016)


In [21]:
#This adds/concatenate along the second axis
Y = np.c_[personalised_rating , Y]
R = np.c_[(personalised_rating != 0).astype(int) , R]

In [22]:
normalised_y , mean_y = normalizeRatings(Y,R)

In [23]:
num_movies, num_users = Y.shape
num_features = 100
# Set Initial Parameters (W, X), use tf.Variable to track these variables
tf.random.set_seed(1234) # for consistent results
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1,          num_users),   dtype=tf.float64),  name='b')

# Instantiate an optimizer.
optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [24]:
iterations = 300
lambda_ = 1
for iter in range(iterations):
    # Use TensorFlow’s GradientTape
    # to record the operations used to compute the cost
    with tf.GradientTape() as tape:

        # Compute the cost (forward pass included in cost)
        cost_value = cost_function_for_collabfiltering(X,W,b,normalised_y,R,1)

    # Use the gradient tape to automatically retrieve
    # the gradients of the trainable variables with respect to the loss
    grads = tape.gradient( cost_value, [X,W,b] )

    # Run one step of gradient descent by updating
    # the value of the variables to minimize the loss.
    optimizer.apply_gradients( zip(grads, [X,W,b]) )

    # Log periodically.
    if iter % 20 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 2321981.3
Training loss at iteration 20: 136170.8
Training loss at iteration 40: 51869.2
Training loss at iteration 60: 24604.4
Training loss at iteration 80: 13633.3
Training loss at iteration 100: 8488.8
Training loss at iteration 120: 5808.2
Training loss at iteration 140: 4312.1
Training loss at iteration 160: 3435.9
Training loss at iteration 180: 2902.8
Training loss at iteration 200: 2567.4
Training loss at iteration 220: 2349.6
Training loss at iteration 240: 2203.8
Training loss at iteration 260: 2103.4
Training loss at iteration 280: 2032.2


In [25]:
predictions_normalised =  np.matmul(X.numpy(),W.numpy().T) + b.numpy()
prediction = predictions_normalised + mean_y
pred = prediction[:,0]
print(pred)
# This give the index of current value in the sorted vector , so descending sort of this array can give use the index of the largest value in the current vector
ix = tf.argsort(pred, direction='DESCENDING')


[2.72999641 2.71042097 1.43477092 ... 2.91302396 2.91301832 2.91297297]


In [26]:
print("The Below given movie list is the most recommended according to the Collaborative filtering")
for i in range(20):
    j = ix[i]
    if j not in my_rated:
        print(f'Predicting rating {pred[j]:0.2f} for movie {movie_list[j]}')

The Below given movie list is the most recommended according to the Collaborative filtering
Predicting rating 4.51 for movie Odd Life of Timothy Green, The (2012)
Predicting rating 4.49 for movie My Sassy Girl (Yeopgijeogin geunyeo) (2001)
Predicting rating 4.47 for movie Satin Rouge (2002)
Predicting rating 4.46 for movie Dragons: Gift of the Night Fury (2011)
Predicting rating 4.45 for movie Raise Your Voice (2004)
Predicting rating 4.45 for movie English Vinglish (2012)
Predicting rating 4.44 for movie Colourful (Karafuru) (2010)
Predicting rating 4.43 for movie Tyler Perry's I Can Do Bad All by Myself (2009)
Predicting rating 4.43 for movie Deathgasm (2015)
Predicting rating 4.43 for movie Delirium (2014)
Predicting rating 4.43 for movie One I Love, The (2014)
Predicting rating 4.43 for movie Laggies (2014)
Predicting rating 4.43 for movie Kung Fu Panda: Secrets of the Masters (2011)
Predicting rating 4.43 for movie Particle Fever (2013)
Predicting rating 4.42 for movie Ex Drummer 

In [27]:
for i in range(len(personalised_rating)):
    if personalised_rating[i] > 0:
        print(f'Original {personalised_rating[i]}, Predicted {pred[i]:0.2f} for {movie_list[i]}')

Original [1.], Predicted 1.17 for Mission: Impossible II (2000)
Original [5.], Predicted 4.87 for Ocean's Eleven (2001)
Original [1.], Predicted 1.29 for Bourne Supremacy, The (2004)
Original [5.], Predicted 4.77 for Batman Begins (2005)
Original [3.], Predicted 3.08 for Dark Knight, The (2008)
Original [5.], Predicted 4.89 for Inception (2010)
Original [4.], Predicted 3.92 for Social Network, The (2010)
Original [1.], Predicted 1.39 for Moneyball (2011)
Original [5.], Predicted 4.84 for Dark Knight Rises, The (2012)
Original [5.], Predicted 4.78 for Wolf of Wall Street, The (2013)
Original [1.], Predicted 1.27 for Interstellar (2014)
Original [3.], Predicted 3.11 for John Wick (2014)
Original [4.], Predicted 3.82 for The Intern (2015)
Original [4.], Predicted 3.80 for Now You See Me 2 (2016)
