COLLABORATIVE FILTERING BASED RECOMMENDER SYSTEM

In [1]:
#recommender systems are just an egs of multivariate linear regression where multiple labels for the same sample 
#exist, in this case the multiple labels are the ratings given by diff users for the movies

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
X = np.loadtxt('C:\\Users\\Lenovo\\OneDrive\\Desktop\\coursera_ml\\small_movies_X.csv', delimiter=',')
Y = np.loadtxt('C:\\Users\\Lenovo\\OneDrive\\Desktop\\coursera_ml\\small_movies_Y.csv', delimiter=',')
W = np.loadtxt('C:\\Users\\Lenovo\\OneDrive\\Desktop\\coursera_ml\\small_movies_W.csv', delimiter=',')
R = np.loadtxt('C:\\Users\\Lenovo\\OneDrive\\Desktop\\coursera_ml\\small_movies_R.csv', delimiter=',')
B = np.loadtxt('C:\\Users\\Lenovo\\OneDrive\\Desktop\\coursera_ml\\small_movies_b.csv', delimiter=',')

In [3]:
n = X.shape[1]
nm = X.shape[0]
nu = W.shape[0]

print('no of features: ', n)
print('no of movies: ', nm)
print('no of users: ', nu)

no of features:  10
no of movies:  4778
no of users:  443


In [4]:
print("R shape: :", R.shape)
print("X shape: ", X.shape)
print("Y shape: ", Y.shape)
print("W shape: ", W.shape)
print("B shape: ", B.shape)

R shape: : (4778, 443)
X shape:  (4778, 10)
Y shape:  (4778, 443)
W shape:  (443, 10)
B shape:  (443,)


In [5]:
#X: feature matrix of order (nm, n)
#Y: ratings matrix(lables matrix) of order (nm, nu)
#W: weight matrix of order (nu, n)
#B: bias vector reprsented in a matrix form of order (1, nu)
#R: check matrix which gives info if a user j has rated a movie i and its the case if that elements value is 1, of 
#order (nm, nu) 

In [6]:
#only Y, R needed to start collaborative filtering algo

In [17]:
#cost func for collaborative filtering
def cofi_cost_func(X, Y, W, B, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Vectorized for speed, using tf not numpy so as to make it compatible with the custom training loop

    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      B (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
      
    Returns:
      J (float) : Cost
    """

    J = 0

    J += ((tf.reduce_sum(((tf.linalg.matmul(X, tf.transpose(W)) + B - Y)*R)**2)/2) + (lambda_/2)*(tf.reduce_sum(W**2) + tf.reduce_sum(X**2)))

    return J

In [16]:
#custom training loop
def training_loop(X, Y, W, B, R, lambda_, iterations, optimizer):
    """
    Returns trained W, B, X for the recommender system
    
    Args:
      X (ndarray (num_movies,num_features)): initial matrix of item features
      W (ndarray (num_users,num_features)) : initial matrix of user parameters
      B (ndarray (1, num_users)            : inintial vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
      iterations (int): number of epochs
      optimizer (tf.keras.Adam() instance): instance of Adam optimizer

    Returns:
      X (ndarray (num_movies,num_features)): trained matrix of item features
      W (ndarray (num_users,num_features)) : trained matrix of user parameters
      B (ndarray (1, num_users)            : trained vector of user parameters
      
    """
    
    for i in range(iterations):   #this is using batch gradient descent with variable leanring rate since the gradients
        #all have the sigma notation which indicates that all the training samples are considered for an updation, so 
        #each iteration can also be considered as an epoch since in one iteration the entire dataset has been passed
        #through
        with tf.GradientTape() as tape:
            cost = cofi_cost_func(X, Y, W, B, R, lambda_)
        
        grads = tape.gradient(cost, [X, W, B])
        optimizer.apply_gradients(zip(grads, [X, W, B]))   #adam optimizer will be used 

        if i%20==0:
            print(f"epoch: {i+1}, cost: {cost}")
    
    return X, W, B


In [7]:
#mean normalisation of Y   
def mean_norm(Y, R):
    """
    Args:
        Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
        R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
        
    Returns: 
        Y (ndarray (num_movies,num_users)    : mean normalised matrix of user ratings of movies
        mean_vec (ndarray (1, num_users)): the mean of the movies in vectorized form 
    """
    mean_vec = np.reshape(np.sum(Y, axis=1)/np.sum((Y != 0).astype(int), axis=1), (-1, Y.shape[0]))  #(1, nm)
    Y -= mean_vec.T
    Y *= R

    return Y, mean_vec

In [8]:
#the ratings by 443 users are already mentioned in Y, but adding our own ratings for certain movies

In [9]:
my_ratings = np.zeros(Y.shape[0])


my_ratings[2700] = 5 # Toy Story 
my_ratings[2609] = 2  #Persuasion (2007)
my_ratings[929]  = 5   # Lord of the Rings: The Return of the King, The
my_ratings[246]  = 5   # Shrek (2001)
my_ratings[2716] = 3   # Inception
my_ratings[1150] = 5   # Incredibles, The (2004)
my_ratings[382]  = 2   # Amelie (Fabuleux destin d'Amélie Poulain, Le)
my_ratings[366]  = 5   # Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
my_ratings[622]  = 5   # Harry Potter and the Chamber of Secrets (2002)
my_ratings[988]  = 3   # Eternal Sunshine of the Spotless Mind (2004)
my_ratings[2925] = 1   # Louis Theroux: Law & Disorder (2008)
my_ratings[2937] = 1   # Nothing to Declare (Rien à déclarer)
my_ratings[793]  = 5   # Pirates of the Caribbean: The Curse of the Black Pearl (2003)


#some of the ratings given by this new user, the rest of the ratings are 0 as the user has not rated them yet

In [10]:
Y = np.c_[my_ratings, Y]
R = np.c_[(my_ratings != 0).astype(int), R]

In [11]:
print(Y.shape)
print(R.shape)   #new cols added 
nu += 1   #to reflect the fact that a new user was added

(4778, 444)
(4778, 444)


In [12]:
#applying mean normalisation on Y 
Y_norm, movies_mean = mean_norm(Y, R)

In [13]:
# print(Y_norm.shape)
# print(Y.shape)
print(movies_mean.shape)

(1, 4778)


In [34]:
#here since we are training to find the values for X too, we can choose the number of features as we please, hence in this 
#egs we will be using 100 features
n = 100
print("mean values of movie ratings for each user: ", movies_mean)

#creating initial values of X, W, B based on this new number of features
tf.random.set_seed(1234)  #to achieve consistency in the random nos
X_init = tf.Variable(tf.random.normal((nm, n), dtype=tf.float64), name='X')
W_init = tf.Variable(tf.random.normal((nu, n), dtype=tf.float64), name='W')
B_init = tf.Variable(tf.random.normal((1, nu), dtype=tf.float64), name='B')

#instantiating the Adam optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

mean values of movie ratings for each user:  [[3.4  3.25 2.   ... 3.5  3.5  3.5 ]]


In [35]:
iter = 200
l = 1
X, W, B = training_loop(X_init, Y_norm, W_init, B_init, R, l, iter, optimizer)


epoch: 1, cost: 2286455.4447972253
epoch: 21, cost: 133685.5210945303
epoch: 41, cost: 50539.461469094334
epoch: 61, cost: 23852.63504154022
epoch: 81, cost: 13176.219037940646
epoch: 101, cost: 8200.921999031263
epoch: 121, cost: 5614.828357320321
epoch: 141, cost: 4173.217081024721
epoch: 161, cost: 3331.4338394382667
epoch: 181, cost: 2822.234005454627


In [67]:
#now to predict the ratings with our trained recommender system
#making a prediction matrix of same order as Y(obviously)

#changed X, W, B to numpy arrays by using tfarr.numpy()

p = X@np.transpose(W) + B + np.transpose(movies_mean)
print(p)   #the predictions for all users 

[[2.68546368 4.91422754 3.34840872 ... 3.12371376 4.03680164 3.50909084]
 [2.62247246 4.75526795 3.23976213 ... 2.85928252 3.52696279 3.51685461]
 [1.33144096 3.51508601 2.1145799  ... 1.80729587 2.13358998 2.06436881]
 ...
 [2.79787189 5.0182921  3.56858088 ... 3.32913104 3.96823924 3.6399879 ]
 [2.79829553 5.01835692 3.5695205  ... 3.32874733 3.9670432  3.63937424]
 [2.79831801 5.01835794 3.56952893 ... 3.3287716  3.96703927 3.63918386]]
