In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import keras

In [2]:
# Generate a synthetic dataset with known latent features

# True latent features for items (100 items × 3 features)
true_item_features = np.random.normal(0, 1, size=(100, 3))

# True latent features for users (30 users × 3 features)
true_user_prefs = np.random.normal(0, 1, size=(30, 3))

# Generate ratings using dot product + noise
ratings_complete = true_item_features @ true_user_prefs.T + np.random.normal(0, 0.5, (100, 30))

# Clip ratings to the range [1, 5] and round
ratings_complete = np.clip(np.round(ratings_complete), 1, 5)

# Introduce missing values (50% masked)
mask = np.random.rand(*ratings_complete.shape) < 0.5
ratings_synthetic = ratings_complete.copy()
ratings_synthetic[mask] = np.nan

# Create DataFrame
items = [f'Item_{i+1}' for i in range(100)]
users = [f'User_{j+1}' for j in range(30)]
df_synthetic = pd.DataFrame(ratings_synthetic, index=items, columns=users)

df_synthetic.head()


Unnamed: 0,User_1,User_2,User_3,User_4,User_5,User_6,User_7,User_8,User_9,User_10,...,User_21,User_22,User_23,User_24,User_25,User_26,User_27,User_28,User_29,User_30
Item_1,1.0,4.0,,,,,1.0,,,,...,,,,,,1.0,,1.0,,
Item_2,1.0,1.0,1.0,1.0,1.0,,,1.0,1.0,,...,,1.0,,,,,1.0,4.0,,1.0
Item_3,,2.0,1.0,,1.0,1.0,,1.0,1.0,1.0,...,1.0,1.0,,1.0,,1.0,,2.0,1.0,
Item_4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,...,1.0,,1.0,,,,1.0,,1.0,1.0
Item_5,,,,3.0,,2.0,,1.0,,,...,,,1.0,,,1.0,5.0,1.0,,


In [3]:
# Assume df_synthetic is your DataFrame
Y = df_synthetic.fillna(0).values     # Ratings matrix, missing entries as 0
R = ~df_synthetic.isna()              # Boolean mask where rating exists
R = R.astype(int).values              # Convert True/False to 1/0

In [4]:
# X (ndarray (num_movies,num_features)): matrix of item features
#       W (ndarray (num_users,num_features)) : matrix of user parameters
#       b (ndarray (1, num_users)            : vector of user parameters
#       Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
#       R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user

In [5]:
# Parameters
num_movies = 100
num_users = 30
num_features = 3

# Generate item features matrix X: (100 items × 3 features)
X = np.random.normal(0, 1, size=(num_movies, num_features))

# Generate user parameters matrix W: (30 users × 3 features)
W = np.random.normal(0, 1, size=(num_users, num_features))

# Generate user bias vector b: (1 × 30 users)
b = np.random.normal(0, 0.5, size=(1, num_users))

X[:5], W[:5], b[:, :5]  # Show first 5 rows/users for each

(array([[ 1.36404066, -1.6449417 , -1.35133446],
        [ 2.00682112, -0.13795994,  1.92882879],
        [-0.12926093, -0.11129731,  1.14268219],
        [-0.19731742, -1.05901335,  0.60047128],
        [-0.1740946 ,  0.32855294,  0.5002973 ]]),
 array([[ 0.9835061 ,  0.41745832,  0.84354872],
        [ 0.9368884 , -1.24933115, -0.81539031],
        [-0.86527799,  1.16387673, -1.37226111],
        [-2.20498288, -0.38352503,  0.14545706],
        [-0.13971188, -0.34962112, -0.85929102]]),
 array([[ 0.55302874,  0.14160739,  0.83580595, -0.64256201, -0.31695526]]))

In [6]:
nm,nu = Y.shape
_,n=X.shape

In [7]:
print("shape of X",X.shape)
print("shape of W",W.shape)
print("shape of b",b.shape)
print("shape of Y",Y.shape)
print("shape of R",R.shape)

shape of X (100, 3)
shape of W (30, 3)
shape of b (1, 30)
shape of Y (100, 30)
shape of R (100, 30)


In [8]:
# def cofi_cost_func(X, W, b, Y, R, lambda_):
#     """
#     Returns the cost for the collaborative-based filtering
#     Args:
#       X (ndarray (num_movies,num_features)): matrix of item features
#       W (ndarray (num_users,num_features)) : matrix of user parameters
#       b (ndarray (1, num_users)            : vector of user parameters
#       Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
#       R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
#       lambda_ (float): regularization parameter
#     Returns:
#       J (float) : Cost
#     """
#     nm, nu = Y.shape
#     J = 0 
#     cwr=0.
#     fwb= (X@W.T)+b
#     cwr+= R*(fwb-Y)**2
#     cwr = np.sum(cwr)
#     cwr/=2
#     regc = 0.
#     n = X.shape[1]
#     for i in range(nm):
#         for k in range(n):
#             regc+= X[i,k]**2
#     for j in range(nu):
#         for k in range(n):
#             regc += W[j,k]**2
#     regc = regc*(lambda_/2)
#     J = cwr+regc
    
#     return J

In [9]:
def cofi_cost_func(X,W,b,Y,R,lambda_):
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J

In [10]:
Y_mean = np.mean(Y)
Y_norm = Y - Y_mean

In [11]:
Y_norm[:2]

array([[ 0.36566667,  3.36566667, -0.63433333, -0.63433333, -0.63433333,
        -0.63433333,  0.36566667, -0.63433333, -0.63433333, -0.63433333,
        -0.63433333,  0.36566667,  1.36566667,  1.36566667, -0.63433333,
        -0.63433333,  0.36566667, -0.63433333, -0.63433333,  0.36566667,
        -0.63433333, -0.63433333, -0.63433333, -0.63433333, -0.63433333,
         0.36566667, -0.63433333,  0.36566667, -0.63433333, -0.63433333],
       [ 0.36566667,  0.36566667,  0.36566667,  0.36566667,  0.36566667,
        -0.63433333, -0.63433333,  0.36566667,  0.36566667, -0.63433333,
        -0.63433333, -0.63433333, -0.63433333, -0.63433333, -0.63433333,
         0.36566667, -0.63433333, -0.63433333,  0.36566667,  0.36566667,
        -0.63433333,  0.36566667, -0.63433333, -0.63433333, -0.63433333,
        -0.63433333,  0.36566667,  3.36566667, -0.63433333,  0.36566667]])

In [12]:
import tensorflow as tf

In [13]:
num_movies, num_users = Y.shape
num_features = 100

# Set Initial Parameters (W, X), use tf.Variable to track these variables
tf.random.set_seed(1234) # for consistent results
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1,          num_users),   dtype=tf.float64),  name='b')

# Instantiate an optimizer.
optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [14]:
iterations = 100
lambda_ = 1.3
for iter in range(iterations):
    with tf.GradientTape() as tape:
        cost_value = cofi_cost_func(X, W, b, Y_norm, R, lambda_)
    grads = tape.gradient( cost_value, [X,W,b] )
    optimizer.apply_gradients( zip(grads, [X,W,b]) )

    if iter % 2 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 86880.7
Training loss at iteration 2: 21051.0
Training loss at iteration 4: 10976.1
Training loss at iteration 6: 14357.1
Training loss at iteration 8: 14660.1
Training loss at iteration 10: 11905.9
Training loss at iteration 12: 8984.9
Training loss at iteration 14: 7141.3
Training loss at iteration 16: 6274.6
Training loss at iteration 18: 5858.1
Training loss at iteration 20: 5499.9
Training loss at iteration 22: 5082.8
Training loss at iteration 24: 4640.2
Training loss at iteration 26: 4219.4
Training loss at iteration 28: 3840.5
Training loss at iteration 30: 3504.8
Training loss at iteration 32: 3208.1
Training loss at iteration 34: 2947.0
Training loss at iteration 36: 2718.3
Training loss at iteration 38: 2518.2
Training loss at iteration 40: 2341.0
Training loss at iteration 42: 2181.2
Training loss at iteration 44: 2034.5
Training loss at iteration 46: 1898.7
Training loss at iteration 48: 1772.4
Training loss at iteration 50: 1655.0
Training lo

In [15]:
# The parameters X,W,b have been updated

In [16]:
Y_pred = np.matmul(X.numpy(), np.transpose(W.numpy())) + b.numpy()

In [17]:
# Now for a particular user, let's recommend some movies that have not been viewed by the user before.
# Say, for this example, User_11

In [18]:
user_ratings = Y_pred[:, 10]  # All items predicted for that user

In [19]:
rated_items = R[:, 10]  # 1 if rated, 0 if not
unrated_mask = (rated_items == 0) # Creates a boolean mask
user_ratings_unrated = user_ratings * unrated_mask

In [20]:
user_ratings_unrated

array([ 0.64523299,  0.93685497,  0.        ,  0.08622737,  0.59066869,
        0.        ,  0.38252891, -0.1946545 ,  0.09601293,  0.        ,
        0.        , -0.70355566,  0.        ,  0.03035576,  0.39300218,
        0.00179022,  0.15611033,  0.13834581,  0.47094418,  0.12830341,
        0.        ,  0.        ,  1.40758511,  1.34367052,  0.        ,
       -0.26570377,  0.        ,  0.16992478,  0.52972537,  0.        ,
        0.49955513,  0.85312439,  0.        ,  0.22568738, -0.06832369,
        0.        ,  0.        ,  0.16453662,  0.69118464,  0.        ,
        0.        ,  0.36965178,  0.        , -1.25046852,  0.        ,
        0.18092272,  0.82818564, -0.19215911,  0.        ,  0.44563966,
        0.        ,  0.        ,  0.38333193,  0.        , -0.63260302,
        0.        ,  0.        , -0.26659355,  0.        ,  0.97590114,
        0.        ,  0.        ,  0.        ,  0.07416   ,  0.99779805,
        0.19583165,  0.        ,  0.        ,  0.47827481,  0.  

In [21]:
items = np.array([f'Item_{i+1}' for i in range(Y.shape[0])])

In [22]:
top_k = 15

In [23]:
recommended_indices = np.argsort(user_ratings_unrated)[::-1][:top_k]
recommended_items = items[recommended_indices]

In [24]:
for i,item in enumerate(recommended_items):
    print("Choice",i+1,": ",item)

Choice 1 :  Item_23
Choice 2 :  Item_24
Choice 3 :  Item_65
Choice 4 :  Item_60
Choice 5 :  Item_2
Choice 6 :  Item_32
Choice 7 :  Item_47
Choice 8 :  Item_39
Choice 9 :  Item_1
Choice 10 :  Item_5
Choice 11 :  Item_95
Choice 12 :  Item_29
Choice 13 :  Item_85
Choice 14 :  Item_31
Choice 15 :  Item_77
