In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tqdm import tqdm

In [2]:
df = pd.read_csv('data/ratings.csv')
# df.head()

In [3]:
Y_df = pd.DataFrame(index=sorted(df['movieId'].unique().tolist()), columns=df['userId'].unique().tolist())
# Y_df.head()

In [4]:
# Create a pivot table from the dataframe df
pivot_df = df.pivot(index='movieId', columns='userId', values='rating')

# Reindex to match Y_df's index and columns, filling missing values with 0 or NaN as needed
Y_df = pivot_df.reindex(index=Y_df.index, columns=Y_df.columns)

# Make a numpy array of Y
Y = Y_df.to_numpy()

print(Y)

[[4.  nan nan ... 2.5 3.  5. ]
 [nan nan nan ... 2.  nan nan]
 [4.  nan nan ... 2.  nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]


In [5]:
nm, nu = Y.shape
nf = 20

In [6]:
tf.random.set_seed(1234)

X = tf.Variable(tf.random.normal((nm, nf), dtype=tf.float64), name='X')
W = tf.Variable(tf.random.normal((nu, nf), dtype=tf.float64), name='W')
b = tf.Variable(tf.random.normal((1,  nu), dtype=tf.float64), name='b')

# X[0], W[0], b[2,0]

b[0][0:2]

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([-0.08194051,  0.7000041 ])>

In [59]:
def calculate_cost(X, W, b, Y, lambda_):

    matrix = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)
    regularization = lambda_ * (tf.reduce_sum(W ** 2) + tf.reduce_sum(X ** 2))

    # Replace NaNs with zeros (or any value that doesn't affect the sum)
    matrix_no_nan = tf.where(tf.math.is_nan(matrix), tf.zeros_like(matrix), matrix)

    # Sum all non-NaN values
    sum_non_nan = tf.reduce_sum(matrix_no_nan ** 2)

    return (sum_non_nan + regularization) / 2

In [60]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

In [61]:
epochs = 1000
lambda_ = 1.5
for epoch in tqdm(range(epochs)):
    with tf.GradientTape() as tape:
        cost = calculate_cost(X, W, b, Y, lambda_)

    grads = tape.gradient(cost, [X,W,b])
    optimizer.apply_gradients(zip(grads, [X,W,b]))

100%|██████████| 1000/1000 [04:43<00:00,  3.53it/s]


In [62]:
Y_pred = tf.tensordot(X, tf.transpose(W), axes=1) + b

In [63]:
movie_df = pd.read_csv('data/movies.csv')
movie_df = movie_df.set_index('movieId')
movie_df.head()

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


In [64]:
movie_df.loc[50]

title     Usual Suspects, The (1995)
genres        Crime|Mystery|Thriller
Name: 50, dtype: object

In [65]:
new_df = pd.DataFrame(Y_pred, index=Y_df.index, columns=Y_df.columns)
new_df['movie'] = movie_df['title']
new_df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,602,603,604,605,606,607,608,609,610,movie
1,3.898792,4.293927,1.069678,3.852456,4.23565,4.588624,4.395474,3.294609,3.294677,2.997244,...,3.667811,4.11534,3.546884,3.682458,2.909723,3.70511,2.742307,3.447208,4.729729,Toy Story (1995)
2,3.698902,3.178899,2.384709,2.536584,3.551601,3.691776,3.400218,3.646883,2.862759,3.26601,...,2.974245,2.620318,3.874792,3.092464,2.817795,3.948704,2.32787,3.004392,3.071694,Jumanji (1995)
3,4.017408,3.732904,0.881372,2.733678,4.0197,4.252423,4.054943,3.405606,2.727841,4.241159,...,3.091427,4.392492,3.332128,3.007448,2.683276,5.509225,2.141176,3.189107,2.494748,Grumpier Old Men (1995)
4,4.109976,4.065089,2.392151,1.829217,3.327249,2.975941,2.004267,2.977583,2.043166,4.109446,...,3.041332,2.861136,3.070875,2.361976,2.88909,3.620935,2.479897,3.053304,2.808923,Waiting to Exhale (1995)
5,4.864579,4.966102,3.046638,2.909662,3.279629,4.586119,0.095037,2.128492,3.94973,4.524457,...,2.763513,-0.148166,2.958124,2.627255,1.441532,1.269608,2.510063,2.752523,2.918142,Father of the Bride Part II (1995)


#### predictions for user 1

In [68]:
new_df[[1, 'movie']]

Unnamed: 0,1,movie
1,3.898792,Toy Story (1995)
2,3.698902,Jumanji (1995)
3,4.017408,Grumpier Old Men (1995)
4,4.109976,Waiting to Exhale (1995)
5,4.864579,Father of the Bride Part II (1995)
...,...,...
193581,4.317037,Black Butler: Book of the Atlantic (2017)
193583,4.310839,No Game No Life: Zero (2017)
193585,4.310461,Flint (2017)
193587,4.310762,Bungo Stray Dogs: Dead Apple (2018)
