In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tqdm import tqdm

In [2]:
df = pd.read_csv('data/ratings.csv')
# df.head()

In [3]:
Y_df = pd.DataFrame(index=sorted(df['movieId'].unique().tolist()), columns=df['userId'].unique().tolist())
# Y_df.head()

In [4]:
# Create a pivot table from the dataframe df
pivot_df = df.pivot(index='movieId', columns='userId', values='rating')

# Reindex to match Y_df's index and columns, filling missing values with 0 or NaN as needed
Y_df = pivot_df.reindex(index=Y_df.index, columns=Y_df.columns)

# Make a numpy array of Y
Y = Y_df.to_numpy()

print(Y)

[[4.  nan nan ... 2.5 3.  5. ]
 [nan nan nan ... 2.  nan nan]
 [4.  nan nan ... 2.  nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]


In [5]:
nm, nu = Y.shape
nf = 20

In [6]:
tf.random.set_seed(1234)

X = tf.Variable(tf.random.normal((nm, nf), dtype=tf.float64), name='X')
W = tf.Variable(tf.random.normal((nu, nf), dtype=tf.float64), name='W')
b = tf.Variable(tf.random.normal((1,  nu), dtype=tf.float64), name='b')

# X[0], W[0], b[2,0]

b[0][0:2]

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([-0.08194051,  0.7000041 ])>

In [7]:
def calculate_cost(X, W, b, Y, lambda_):

    matrix = (tf.tensordot(X, tf.transpose(W), axes=1) + b - Y)
    regularization = lambda_ * (tf.reduce_sum(W ** 2) + tf.reduce_sum(X ** 2))

    # Replace NaNs with zeros (or any value that doesn't affect the sum)
    matrix_no_nan = tf.where(tf.math.is_nan(matrix), tf.zeros_like(matrix), matrix)

    # Sum all non-NaN values
    sum_non_nan = tf.reduce_sum(matrix_no_nan ** 2)

    return (sum_non_nan + regularization) / 2

In [8]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

In [9]:
epochs = 1000
lambda_ = 1.5
for epoch in tqdm(range(epochs)):
    with tf.GradientTape() as tape:
        cost = calculate_cost(X, W, b, Y, lambda_)

    grads = tape.gradient(cost, [X,W,b])
    optimizer.apply_gradients(zip(grads, [X,W,b]))

100%|██████████| 1000/1000 [03:24<00:00,  4.90it/s]


In [10]:
Y_pred = tf.tensordot(X, tf.transpose(W), axes=1) + b

In [44]:
movie_df = pd.read_csv('data/movies.csv')
movie_df = movie_df.set_index('movieId')
movie_df.head()

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


In [46]:
movie_df.loc[50]

title     Usual Suspects, The (1995)
genres        Crime|Mystery|Thriller
Name: 50, dtype: object

In [49]:
new_df = pd.DataFrame(Y_pred, index=Y_df.index, columns=Y_df.columns)
new_df['movie'] = movie_df['title']
new_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,602,603,604,605,606,607,608,609,610,movie
1,3.995354,3.54747,2.594645,5.465924,3.550314,4.243351,5.307731,4.254967,3.517068,3.647536,...,3.253163,3.455262,3.640803,3.677512,3.112444,3.749669,2.081514,3.450507,4.436178,Toy Story (1995)
2,3.260261,3.837275,1.856736,3.362137,3.269763,3.982213,4.112228,3.367141,3.055758,4.155225,...,3.203713,3.04221,3.559473,3.352554,2.609032,4.127159,2.35211,3.058363,3.812741,Jumanji (1995)
3,4.212994,3.458158,1.657049,2.50767,3.917288,4.686126,4.21383,2.829126,1.805872,3.58234,...,2.86853,2.865821,3.161257,2.670433,3.535391,5.030427,1.805791,3.592106,3.263654,Grumpier Old Men (1995)
4,3.725815,4.010878,1.192126,0.669249,3.204674,2.963493,2.453845,2.555882,2.340013,3.856097,...,3.982302,4.060831,3.546256,2.824201,3.419612,4.252477,3.162327,2.997582,2.973756,Waiting to Exhale (1995)
5,4.033223,4.145147,2.557883,3.84889,3.986843,4.664439,0.837241,3.014892,3.288822,2.292139,...,2.556054,1.195827,2.874201,3.267473,1.612232,2.252644,1.162059,2.969226,1.325385,Father of the Bride Part II (1995)


#### predictions for user 1

In [58]:
new_df[[1, 'movie']]

Unnamed: 0,1,movie
1,3.995354,Toy Story (1995)
2,3.260261,Jumanji (1995)
3,4.212994,Grumpier Old Men (1995)
4,3.725815,Waiting to Exhale (1995)
5,4.033223,Father of the Bride Part II (1995)
...,...,...
193581,4.120088,Black Butler: Book of the Atlantic (2017)
193583,4.067515,No Game No Life: Zero (2017)
193585,4.067508,Flint (2017)
193587,4.067514,Bungo Stray Dogs: Dead Apple (2018)
