<a href="https://colab.research.google.com/github/Takeitiz/Machine-Learning/blob/main/Collaborative_Filtering_Recommender_Systems.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from numpy import loadtxt
import pandas as pd 
%matplotlib inline 
import tensorflow as tf 
from tensorflow import keras 

In [5]:
file = open('/content/drive/MyDrive/work1/data/small_movies_X.csv', 'rb')
X = np.loadtxt(file, delimiter = ",")
file = open('/content/drive/MyDrive/work1/data/small_movies_W.csv', 'rb')
W = loadtxt(file,delimiter = ",")
file = open('/content/drive/MyDrive/work1/data/small_movies_b.csv', 'rb')
b = loadtxt(file,delimiter = ",")
b = b.reshape(1,-1)
num_movies, num_features = X.shape
num_users,_ = W.shape

In [7]:
file = open('/content/drive/MyDrive/work1/data/small_movies_Y.csv', 'rb')
Y = loadtxt(file,delimiter = ",")
file = open('/content/drive/MyDrive/work1/data/small_movies_R.csv', 'rb')
R = loadtxt(file,delimiter = ",")

In [8]:
print("Y", Y.shape, "R", R.shape)
print("X", X.shape)
print("W", W.shape)
print("b", b.shape)
print("num_features", num_features)
print("num_movies",   num_movies)
print("num_users",    num_users)

Y (4778, 443) R (4778, 443)
X (4778, 10)
W (443, 10)
b (1, 443)
num_features 10
num_movies 4778
num_users 443


In [10]:
tsmean =  np.mean(Y[0, R[0, :].astype(bool)])
print(f"Average rating for movie 1 : {tsmean:0.3f} / 5" )

Average rating for movie 1 : 3.400 / 5


In [34]:
def cofi_cost_func(X, W, b, Y, R, lambda_):
  nm, nu = Y.shape
  J = 0
  for i in range(nu):
    w = W[i]
    b_i = b[0,i]
    for j in range(nm):
      x = X[j]
      y = Y[j, i]
      r = R[j, i]
      J += np.square(r * (np.dot(w,x) + b_i - y))
  J = J/2
  J += (lambda_/2) * (np.sum(np.square(W)) + np.sum(np.square(X)))
  return J

In [35]:
num_users_r = 4
num_movies_r = 5 
num_features_r = 3
X_r = X[:num_movies_r, :num_features_r]
W_r = W[:num_users_r,  :num_features_r]
b_r = b[0, :num_users_r].reshape(1,-1)
Y_r = Y[:num_movies_r, :num_users_r]
R_r = R[:num_movies_r, :num_users_r]
J = cofi_cost_func(X_r, W_r, b_r, Y_r, R_r, 0);
print(f"Cost: {J:0.2f}")

Cost: 13.67


In [36]:
def cofi_cost_func_v(X, W, b, Y, R, lambda_):
  j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
  J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
  return J 

In [37]:
movieList_df = pd.read_csv('/content/drive/MyDrive/work1/data/small_movie_list.csv', header=0, index_col=0,  delimiter=',', quotechar='"')
movieList = movieList_df["title"].to_list()

In [40]:
my_ratings = np.zeros(num_movies)
my_ratings[2700] = 5 
my_ratings[2609] = 2
my_ratings[929]  = 5   # Lord of the Rings: The Return of the King, The
my_ratings[246]  = 5   # Shrek (2001)
my_ratings[2716] = 3   # Inception
my_ratings[1150] = 5   # Incredibles, The (2004)
my_ratings[382]  = 2   # Amelie (Fabuleux destin d'Amélie Poulain, Le)
my_ratings[366]  = 5   # Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
my_ratings[622]  = 5   # Harry Potter and the Chamber of Secrets (2002)
my_ratings[988]  = 3   # Eternal Sunshine of the Spotless Mind (2004)
my_ratings[2925] = 1   # Louis Theroux: Law & Disorder (2008)
my_ratings[2937] = 1   # Nothing to Declare (Rien à déclarer)
my_ratings[793]  = 5 
my_rated = [i for i in range(len(my_ratings)) if my_ratings[i] > 0]


[246, 366, 382, 622, 793, 929, 988, 1150, 2609, 2700, 2716, 2925, 2937]

In [41]:
file = open('/content/drive/MyDrive/work1/data/small_movies_Y.csv', 'rb')
Y = loadtxt(file,delimiter = ",")
file = open('/content/drive/MyDrive/work1/data/small_movies_R.csv', 'rb')
R = loadtxt(file,delimiter = ",")
Y    = np.c_[my_ratings, Y]
R    = np.c_[(my_ratings != 0).astype(int), R]

In [42]:
Ymean = (np.sum(Y*R,axis=1)/(np.sum(R, axis=1)+1e-12)).reshape(-1,1)
Ynorm = Y - np.multiply(Ymean, R)

In [43]:
num_movies, num_users = Y.shape
num_features = 100
tf.random.set_seed(1234) # for consistent results
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1,          num_users),   dtype=tf.float64),  name='b')
optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [44]:
iterations = 200
lambda_ = 1
for iter in range(iterations):
  with tf.GradientTape() as tape:
    cost_value = cofi_cost_func_v(X, W, b, Ynorm, R, lambda_)
  grads = tape.gradient(cost_value, [X,W,b])
  optimizer.apply_gradients(zip(grads, [X,W,b]))
  if iter % 20 == 0:
    print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 2321191.3
Training loss at iteration 20: 136168.7
Training loss at iteration 40: 51863.3
Training loss at iteration 60: 24598.8
Training loss at iteration 80: 13630.4
Training loss at iteration 100: 8487.6
Training loss at iteration 120: 5807.7
Training loss at iteration 140: 4311.6
Training loss at iteration 160: 3435.2
Training loss at iteration 180: 2902.1


In [45]:
p = np.matmul(X.numpy(), np.transpose(W.numpy())) + b.numpy()
pm = p + Ymean
my_predictions = pm[:,0]
ix = tf.argsort(my_predictions, direction='DESCENDING')
for i in range(17):
    j = ix[i]
    if j not in my_rated:
        print(f'Predicting rating {my_predictions[j]:0.2f} for movie {movieList[j]}')

print('\n\nOriginal vs Predicted ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Original {my_ratings[i]}, Predicted {my_predictions[i]:0.2f} for {movieList[i]}')

Predicting rating 4.49 for movie My Sassy Girl (Yeopgijeogin geunyeo) (2001)
Predicting rating 4.48 for movie Martin Lawrence Live: Runteldat (2002)
Predicting rating 4.48 for movie Memento (2000)
Predicting rating 4.47 for movie Delirium (2014)
Predicting rating 4.47 for movie Laggies (2014)
Predicting rating 4.47 for movie One I Love, The (2014)
Predicting rating 4.46 for movie Particle Fever (2013)
Predicting rating 4.45 for movie Eichmann (2007)
Predicting rating 4.45 for movie Battle Royale 2: Requiem (Batoru rowaiaru II: Chinkonka) (2003)
Predicting rating 4.45 for movie Into the Abyss (2011)


Original vs Predicted ratings:

Original 5.0, Predicted 4.90 for Shrek (2001)
Original 5.0, Predicted 4.84 for Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Original 2.0, Predicted 2.13 for Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
Original 5.0, Predicted 4.88 for Harry Potter and the Chamber of Secrets (2002)
Original 5.0, Predic