In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

## Load the data

In [2]:
movies = pd.read_csv('Movies/movies.csv')
rating = pd.read_csv('Movies/ratings.csv')

In [3]:
# Clean up movies
movies['year'] = movies['title'].str.findall(r"\((.*?)\)").str[-1]

movies = movies[~movies['year'].isna()]
movies = movies[movies['year'].str.isdigit()]
movies['year'] = movies['year'].astype('int32')
movies = movies[movies['year'] >= 2020]

In [4]:
movies

Unnamed: 0,movieId,title,genres,year
63950,208775,Stranger (2020),Mystery|Sci-Fi|Thriller,2020
64153,209311,Waves (2020),Drama|Romance,2020
64580,210455,Gretel & Hansel (2020),Fantasy|Horror|Thriller,2020
64586,210469,Like a Boss (2020),Comedy,2020
64628,210569,Birds of Prey (And the Fantabulous Emancipatio...,Action|Adventure|Crime,2020
...,...,...,...,...
86526,288951,A Taste of Whale (2022),Documentary,2022
86532,288967,State of Siege: Temple Attack (2021),Action|Drama,2021
86533,288971,Ouija Japan (2021),Action|Horror,2021
86535,288977,Skinford: Death Sentence (2023),Crime|Thriller,2023


In [5]:
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,1225734739
1,1,110,4.0,1225865086
2,1,158,4.0,1225733503
3,1,260,4.5,1225735204
4,1,356,5.0,1225735119


In [6]:
rating['user_rating_time'] = rating.groupby('userId').transform('size')

In [7]:
rating = rating[rating['user_rating_time']>800]

In [8]:
combine = pd.merge(movies, rating, left_on='movieId', right_on='movieId', how='left').drop('timestamp', axis=1).dropna()

In [9]:
combine['rating_times'] = combine.groupby('movieId').transform('size')

In [10]:
df = combine.pivot(index=['movieId','title','genres'], columns=['userId'], values='rating').fillna(0)

In [11]:
df.shape

(6282, 2167)

In [12]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,userId,461.0,487.0,527.0,677.0,897.0,1011.0,1057.0,1595.0,2056.0,2172.0,...,329503.0,329594.0,329646.0,329920.0,329954.0,330196.0,330235.0,330687.0,330842.0,330914.0
movieId,title,genres,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
209311,Waves (2020),Drama|Romance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0.0
210455,Gretel & Hansel (2020),Fantasy|Horror|Thriller,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
210469,Like a Boss (2020),Comedy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
210569,Birds of Prey (And the Fantabulous Emancipation of One Harley Quinn) (2020),Action|Adventure|Crime,0.0,0.0,0.0,0.0,3.5,0.0,3.5,1.5,0.0,3.0,...,0.0,0.0,0.0,1.5,0.0,1.5,3.0,0.0,2.5,0.0
210571,Fantasy Island (2020),Fantasy|Horror|Sci-Fi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.5,0.0,0.0,2.0,0.0,0.0,0.0


## Build the model

In [13]:
Y = df.values

R = np.zeros((Y.shape))
R = np.where(Y !=0, 1, 0)

#### Cost function - cofi_cost_func

In [64]:
def cofi_cost_func(X, W, b, Y, R, lambda_):
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y) * R
    J = tf.reduce_sum(j ** 2) + lambda_ / 2 * (tf.reduce_sum(tf.square(X)) + tf.reduce_sum(tf.square(W)))
    return J

#### Initialize new user ratings

In [65]:
movielist = df.reset_index()[['movieId','title','genres']]

In [66]:
movielist.head()

userId,movieId,title,genres
0,209311,Waves (2020),Drama|Romance
1,210455,Gretel & Hansel (2020),Fantasy|Horror|Thriller
2,210469,Like a Boss (2020),Comedy
3,210569,Birds of Prey (And the Fantabulous Emancipatio...,Action|Adventure|Crime
4,210571,Fantasy Island (2020),Fantasy|Horror|Sci-Fi


In [67]:
list1 = movielist[movielist['genres'].str.contains('Romance')]['movieId'][:5]
list2 = movielist[movielist['genres'].str.contains('Romance')]['movieId'][5:10]
list3 = movielist[movielist['genres'].str.contains('Thriller')]['movieId'][15:20]

In [68]:
my_ratings = np.zeros(movielist.shape[0])
movielist['my_ratings'] = my_ratings

In [69]:
movielist

userId,movieId,title,genres,my_ratings
0,209311,Waves (2020),Drama|Romance,0.0
1,210455,Gretel & Hansel (2020),Fantasy|Horror|Thriller,0.0
2,210469,Like a Boss (2020),Comedy,0.0
3,210569,Birds of Prey (And the Fantabulous Emancipatio...,Action|Adventure|Crime,0.0
4,210571,Fantasy Island (2020),Fantasy|Horror|Sci-Fi,0.0
...,...,...,...,...
6277,288945,Mr. Car and the Knights Templar (2023),Adventure,0.0
6278,288947,The Year I Started Masturbating (2022),Comedy|Drama|Romance,0.0
6279,288949,Eldorado: Everything the Nazis Hate (2023),Documentary,0.0
6280,288951,A Taste of Whale (2022),Documentary,0.0


In [70]:
movielist['my_ratings'] += np.where(movielist['movieId'].isin(list1), 5, 0)
movielist['my_ratings'] += np.where(movielist['movieId'].isin(list2), 4.5, 0)
movielist['my_ratings'] += np.where(movielist['movieId'].isin(list3), 1.5, 0)

In [71]:
movielist['my_rated'] = np.where(movielist['my_ratings'] !=0, 1, 0)

In [72]:
movielist['my_ratings'].sum()

55.0

In [73]:
np.where(movielist['movieId'].isin(list1), 5, 0).shape

(6282,)

In [74]:
movielist.head()

userId,movieId,title,genres,my_ratings,my_rated
0,209311,Waves (2020),Drama|Romance,5.0,1
1,210455,Gretel & Hansel (2020),Fantasy|Horror|Thriller,0.0,0
2,210469,Like a Boss (2020),Comedy,0.0,0
3,210569,Birds of Prey (And the Fantabulous Emancipatio...,Action|Adventure|Crime,0.0,0
4,210571,Fantasy Island (2020),Fantasy|Horror|Sci-Fi,0.0,0


In [75]:
Y = np.c_[movielist['my_ratings'], Y]

In [76]:
R = np.c_[movielist['my_rated'], R]

In [77]:
R.shape

(6282, 2169)

In [78]:
Y.shape

(6282, 2169)

#### Initialize X, W, b

In [79]:
nm, nu = Y.shape
nf = 100

tf.random.set_seed(1234)

X = tf.Variable(tf.random.normal((nm, nf), dtype=tf.float64, name='X'))
W = tf.Variable(tf.random.normal((nu, nf), dtype=tf.float64, name='W'))

b = tf.Variable(tf.random.normal((1, nu), dtype=tf.float64, name='b'))

In [80]:
optimizer = keras.optimizers.Adam(learning_rate=0.1)

#### Normalize Y

In [81]:
Y

array([[5., 5., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [82]:
def normalize_Y(Y, R):
    Ynorm = np.zeros(Y.shape)
    Ymean = []

    nm, nu = Y.shape
    for i in range(nm):
        Ymean_i = Y[i][Y[i] !=0].mean()
        for j in range(nu):
            if Y[i, j] !=0:
                Ynorm[i,j] = Y[i,j] - Ymean_i
        Ymean.append(Ymean_i)
    return Ynorm, Ymean

In [83]:
Y_norm, Y_mean = normalize_Y(Y, R)

In [84]:
Y[0][Y[0] !=0].mean()

3.449438202247191

#### Set up the model

In [85]:
iterations = 200
lambda_ = 1

for iter in range(iterations):

    with tf.GradientTape() as tape:
        cost_value = cofi_cost_func(X, W, b, Y_norm, R, lambda_)
    grads = tape.gradient(cost_value, [X, W, b])
    optimizer.apply_gradients(zip(grads, [X, W, b]))

    if iter%20 == 0:
        print(f'iteration {iter}, with cost as {cost_value}')

iteration 0, with cost as 12261665.188538719
iteration 20, with cost as 409095.6051552713
iteration 40, with cost as 175464.01660230095
iteration 60, with cost as 103590.55403767705
iteration 80, with cost as 68776.115581877
iteration 100, with cost as 48654.17828346169
iteration 120, with cost as 35933.698133978345
iteration 140, with cost as 27479.2770013685
iteration 160, with cost as 21670.669170613786
iteration 180, with cost as 17580.098035130195


## Recommendation

In [86]:
X.shape

TensorShape([6282, 100])

In [87]:
p = tf.matmul(X, tf.transpose(W)) + b

In [90]:
pm = p + np.array(Y_mean).reshape((-1,1))

In [91]:
pm

<tf.Tensor: shape=(6282, 2169), dtype=float64, numpy=
array([[ 5.00260014,  4.97979836,  4.41597672, ...,  3.9121775 ,
         4.20815149,  3.69996325],
       [ 2.61048525,  3.70801761,  1.49350735, ...,  1.57275101,
         3.56532114,  2.4068936 ],
       [ 3.41240453,  2.79082459,  2.73880704, ...,  1.54028943,
         3.46084388,  1.76017056],
       ...,
       [ 0.99081077,  0.91915782,  0.70060488, ..., -0.19171491,
         0.53921453,  1.47147892],
       [ 4.10009008,  4.21480401,  3.56198039, ...,  3.05834128,
         4.05065624,  3.7643661 ],
       [ 3.51184943,  3.6613184 ,  3.06888911, ...,  2.60267518,
         3.3927737 ,  3.18458089]])>

In [92]:
my_prediction = pm[:,0]

In [93]:
ix = tf.argsort(my_prediction, direction='DESCENDING')

In [94]:
ix

<tf.Tensor: shape=(6282,), dtype=int32, numpy=array([5241, 3785, 2581, ..., 4505,  951,  402], dtype=int32)>

In [95]:
movielist['prediction'] = my_prediction

In [96]:
movielist[movielist['my_rated'] != 0]

userId,movieId,title,genres,my_ratings,my_rated,prediction
0,209311,Waves (2020),Drama|Romance,5.0,1,5.0026
6,210575,The Photograph (2020),Drama|Romance,5.0,1,4.908673
8,210579,What About Love (2020),Drama|Romance,5.0,1,4.954389
38,211946,(UN)Ideal Man (2020),Comedy|Romance|Sci-Fi,5.0,1,4.931497
42,212048,Airplane Mode (2020),Comedy|Romance,5.0,1,4.976069
43,212052,Stargirl (2020),Comedy|Drama|Romance,4.5,1,4.427294
48,212104,Maid-in-Law (2020),Romance,4.5,1,4.487151
60,212395,The Thing About Harry (2020),Comedy|Romance,4.5,1,4.449364
74,212587,Spenser Confidential (2020),Crime|Drama|Mystery|Thriller,1.5,1,1.531345
76,212847,After Midnight (2020),Drama|Horror|Romance,4.5,1,4.444419


In [97]:
movielist

userId,movieId,title,genres,my_ratings,my_rated,prediction
0,209311,Waves (2020),Drama|Romance,5.0,1,5.002600
1,210455,Gretel & Hansel (2020),Fantasy|Horror|Thriller,0.0,0,2.610485
2,210469,Like a Boss (2020),Comedy,0.0,0,3.412405
3,210569,Birds of Prey (And the Fantabulous Emancipatio...,Action|Adventure|Crime,0.0,0,5.192566
4,210571,Fantasy Island (2020),Fantasy|Horror|Sci-Fi,0.0,0,3.992701
...,...,...,...,...,...,...
6277,288945,Mr. Car and the Knights Templar (2023),Adventure,0.0,0,1.327064
6278,288947,The Year I Started Masturbating (2022),Comedy|Drama|Romance,0.0,0,1.067977
6279,288949,Eldorado: Everything the Nazis Hate (2023),Documentary,0.0,0,0.990811
6280,288951,A Taste of Whale (2022),Documentary,0.0,0,4.100090


In [99]:
movielist[movielist['genres'].str.contains('Romance')][:20]

userId,movieId,title,genres,my_ratings,my_rated,prediction
0,209311,Waves (2020),Drama|Romance,5.0,1,5.0026
6,210575,The Photograph (2020),Drama|Romance,5.0,1,4.908673
8,210579,What About Love (2020),Drama|Romance,5.0,1,4.954389
38,211946,(UN)Ideal Man (2020),Comedy|Romance|Sci-Fi,5.0,1,4.931497
42,212048,Airplane Mode (2020),Comedy|Romance,5.0,1,4.976069
43,212052,Stargirl (2020),Comedy|Drama|Romance,4.5,1,4.427294
48,212104,Maid-in-Law (2020),Romance,4.5,1,4.487151
60,212395,The Thing About Harry (2020),Comedy|Romance,4.5,1,4.449364
76,212847,After Midnight (2020),Drama|Horror|Romance,4.5,1,4.444419
78,212869,To All the Boys: P.S. I Still Love You (2020),Comedy|Romance,4.5,1,4.470564


In [100]:
movielist[movielist['genres'].str.contains('Thriller')][:20]

userId,movieId,title,genres,my_ratings,my_rated,prediction
1,210455,Gretel & Hansel (2020),Fantasy|Horror|Thriller,0.0,0,2.610485
13,210855,The Rhythm Section (2020),Action|Mystery|Thriller,0.0,0,2.845835
14,210857,The Turning (2020),Horror|Thriller,0.0,0,1.833907
15,210859,Run (2020),Thriller,0.0,0,4.105092
18,210865,Bad Boys for Life (2020),Action|Crime|Thriller,0.0,0,2.705691
32,211726,Escape from Pretoria (2020),Thriller,0.0,0,3.414621
44,212066,The Heist of the Century (2020),Crime|Drama|Thriller,0.0,0,4.352418
46,212088,Horse Girl (2020),Drama|Thriller,0.0,0,3.722417
52,212190,The Assent (2020),Horror|Thriller,0.0,0,3.34121
61,212401,Inherit the Viper (2020),Crime|Drama|Thriller,0.0,0,3.529315


In [102]:
movielist[:20]

userId,movieId,title,genres,my_ratings,my_rated,prediction
0,209311,Waves (2020),Drama|Romance,5.0,1,5.0026
1,210455,Gretel & Hansel (2020),Fantasy|Horror|Thriller,0.0,0,2.610485
2,210469,Like a Boss (2020),Comedy,0.0,0,3.412405
3,210569,Birds of Prey (And the Fantabulous Emancipatio...,Action|Adventure|Crime,0.0,0,5.192566
4,210571,Fantasy Island (2020),Fantasy|Horror|Sci-Fi,0.0,0,3.992701
5,210573,The King's Man (2020),Action|Adventure|Comedy,0.0,0,1.342639
6,210575,The Photograph (2020),Drama|Romance,5.0,1,4.908673
7,210577,Sonic the Hedgehog (2020),Action|Adventure|Children|Comedy|Fantasy|Sci-Fi,0.0,0,3.975152
8,210579,What About Love (2020),Drama|Romance,5.0,1,4.954389
9,210847,Bloodshot (2020),Action|Drama|Fantasy|Sci-Fi,0.0,0,3.781319


In [None]:
user