In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/Drive')

Mounted at /content/Drive


In [None]:
import numpy as np
import tensorflow as tf
import tensorflow.keras
import pandas as pd
from numpy import loadtxt

In [None]:
def load_dataset():

  file = open('/content/Drive/MyDrive/data/collaborative_filtering/small_movies_X.csv','rb')
  X = loadtxt(file, delimiter=',')

  file = open('/content/Drive/MyDrive/data/collaborative_filtering/small_movies_W.csv','rb')
  W = loadtxt(file, delimiter=',')

  file = open('/content/Drive/MyDrive/data/collaborative_filtering/small_movies_b.csv','rb')
  b = loadtxt(file, delimiter=',')
  b = b.reshape(1,-1)

  return (X,W,b)

In [None]:
X,W,b = load_dataset()

In [None]:
print('shape of X is ',X.shape)
print('shape of W is ',W.shape)
print('shape of b is ',b.shape)

shape of X is  (4778, 10)
shape of W is  (443, 10)
shape of b is  (1, 443)


In [None]:
num_movies, num_features = X.shape
num_user = W.shape[0]

In [None]:
print('number of movies',num_movies)
print('number of features',num_features)
print('number of user',num_user)

number of movies 4778
number of features 10
number of user 443


In [None]:
def load_ratings():
    file = open('/content/Drive/MyDrive/data/collaborative_filtering/small_movies_Y.csv', 'rb')
    Y = loadtxt(file,delimiter = ",")

    file = open('/content/Drive/MyDrive/data/collaborative_filtering/small_movies_R.csv', 'rb')
    R = loadtxt(file,delimiter = ",")
    return(Y,R)

In [None]:
Y,R = load_ratings()

In [None]:
print('shape of R',R.shape)
print('shape of Y',Y.shape)

shape of R (4778, 443)
shape of Y (4778, 443)


In [None]:
def cost_cofi_func(X,Y,W,b,R,lamba):

  j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
  J = 0.5 * tf.reduce_sum(j**2) + (lamba/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
  return J

In [None]:
movieList = pd.read_csv('/content/Drive/MyDrive/data/collaborative_filtering/small_movie_lists.csv',header=0, index_col=0, delimiter=',', quotechar='"')
moviesList_df = movieList['title'].to_list()

In [None]:
my_ratings = np.zeros(num_movies)

my_ratings[2700] = 5 
my_ratings[2609] = 2
my_ratings[929]  = 5 
my_ratings[246]  = 5 
my_ratings[2716] = 3
my_ratings[1150] = 5 
my_ratings[382]  = 2 
my_ratings[366]  = 5
my_ratings[622]  = 5 
my_ratings[988]  = 3
my_ratings[2925] = 1 
my_ratings[2937] = 1
my_ratings[793]  = 5

my_rated = [i for i in range(len(my_ratings)) if my_ratings[i] > 0]

print('\nNew user ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0 :
        print(f'Rated {my_ratings[i]} for  {moviesList_df[i]}');


New user ratings:

Rated 5.0 for  Shrek (2001)
Rated 5.0 for  Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Rated 2.0 for  Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
Rated 5.0 for  Harry Potter and the Chamber of Secrets (2002)
Rated 5.0 for  Pirates of the Caribbean: The Curse of the Black Pearl (2003)
Rated 5.0 for  Lord of the Rings: The Return of the King, The (2003)
Rated 3.0 for  Eternal Sunshine of the Spotless Mind (2004)
Rated 5.0 for  Incredibles, The (2004)
Rated 2.0 for  Persuasion (2007)
Rated 5.0 for  Toy Story 3 (2010)
Rated 3.0 for  Inception (2010)
Rated 1.0 for  Louis Theroux: Law & Disorder (2008)
Rated 1.0 for  Nothing to Declare (Rien à déclarer) (2010)


In [None]:
Y = np.c_[my_ratings,Y]

R = np.c_[(my_ratings!=0).astype(int),R]

In [None]:
Y_mean = (np.sum(Y*R,axis=1)/(np.sum(R, axis=1)+1e-12)).reshape(-1,1)
Ynorm = Y - np.multiply(Y_mean, R) 

In [None]:
num_movies, num_user = Y.shape

num_features = 100

W = tf.Variable(tf.random.normal((num_user,num_features),dtype = tf.float64),name = 'W')
X = tf.Variable(tf.random.normal((num_movies,num_features),dtype = tf.float64),name = 'X')
b = tf.Variable(tf.random.normal((1,num_user),dtype = tf.float64),name = 'b')

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-1)

In [None]:
iteration = 200
lambda_ = 1

for i in range(iteration):
  with tensorflow.GradientTape() as tape:

    cost = cost_cofi_func(X,Ynorm,W,b,R,lambda_)

  grads = tape.gradient( cost, [X,W,b] )
  optimizer.apply_gradients( zip(grads, [X,W,b]) )

  if i % 20 == 0:
        print(f"Training loss at iteration {i}: {cost:0.1f}")

Training loss at iteration 0: 2289811.6
Training loss at iteration 20: 135137.8
Training loss at iteration 40: 51595.2
Training loss at iteration 60: 24428.7
Training loss at iteration 80: 13477.0
Training loss at iteration 100: 8353.3
Training loss at iteration 120: 5694.1
Training loss at iteration 140: 4216.3
Training loss at iteration 160: 3355.5
Training loss at iteration 180: 2835.7


In [None]:
p = np.matmul(X.numpy(), np.transpose(W.numpy())) + b.numpy()

#restore the mean
pm = p + Y_mean

my_predictions = pm[:,0]

# sort predictions
ix = tf.argsort(my_predictions, direction='DESCENDING')


In [None]:
for i in range(17):
  j = ix[i]
  if j not in my_rated:
    print(f'my prediction {my_predictions[j]:0.2f} for {moviesList_df[j]}')

my prediction 4.43 for Colourful (Karafuru) (2010)
my prediction 4.35 for Kung Fu Panda: Secrets of the Masters (2011)
my prediction 4.34 for Particle Fever (2013)
my prediction 4.34 for The Girl with All the Gifts (2016)
my prediction 4.33 for Human (2015)
my prediction 4.33 for Into the Forest of Fireflies' Light (2011)
my prediction 4.33 for Seve (2014)
my prediction 4.33 for Max Manus (2008)
my prediction 4.33 for Bossa Nova (2000)
my prediction 4.33 for Tickling Giants (2017)


In [None]:
filter = (movieList['number of ratings']>20)
movieList['pred'] = my_predictions
movieList = movieList.reindex(columns=['pred','mean rating','number of ratings','title'])
movieList.loc[ix[:300]].loc[filter].sort_values("mean rating", ascending=False)

Unnamed: 0,pred,mean rating,number of ratings,title
2112,3.931116,4.238255,149,"Dark Knight, The (2008)"
676,3.968791,4.146667,75,City of God (Cidade de Deus) (2002)
2395,3.968816,4.136364,88,Inglourious Basterds (2009)
929,4.876224,4.118919,185,"Lord of the Rings: The Return of the King, The..."
2700,4.790935,4.109091,55,Toy Story 3 (2010)
393,4.081565,4.106061,198,"Lord of the Rings: The Fellowship of the Ring,..."
653,3.833626,4.021277,188,"Lord of the Rings: The Two Towers, The (2002)"
3083,3.953273,3.993421,76,"Dark Knight Rises, The (2012)"
2804,4.058692,3.989362,47,Harry Potter and the Deathly Hallows: Part 1 (...
1142,3.848549,3.986842,38,The Machinist (2004)
