In [1]:
import tensorflow as tf

### Auto Diff

In [2]:
w = tf.Variable(3.0)
x = 1.0
y = 1.0
alpha = 0.01

iterations = 30
for iter in range(iterations):
    # tensorflow's gradient tape to record the steps
    with tf.GradientTape() as tape:
        fwb = w*x
        costJ = (fwb - y)**2

    [dJdw] = tape.gradient(costJ, [w])

    w.assign_add(-alpha*dJdw)

## Implementation

In [None]:
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

# iterations = 200
# for iter in range(iterations):
#     with tf.GradientTape() as tape:

#         cost_val = cofiCostFuncV(X, W, b, Ynorm, R, num_users, num_movies, lambda)
    
#     grads  = tape.gradient(cost_value, [x, w, b])

#     optimizer.apply_gradients(zip(grads, [x, w, b]))

### Finding related items

The features $x^{(i)}$ of item $i$ are quite hard to interpret, but collectively they do convey something about that movie, i, and to find realted items, we find a movie with similar features calcualted as,
$$||x^{(k)}-x^{(i)}||^2$$

## Limitations

### Cold start problem
- rank new items that few users have rated?
- show something reasonable to new users who have rated few items?

### Use side infromation about items or users:
- Item: Genre, movie stars, studio,...
- User: Demographics (age, gender, location), expressed preferences, ...

In [3]:
# GRADED FUNCTION: cofi_cost_func
# UNQ_C1

def cofi_cost_func(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    nm, nu = Y.shape
    J = 0
    ### START CODE HERE ###  
    for i in range(nm):
        for j in range(nu):
            prediction = np.dot(X[i], W[j]) + b[:, j]
            error = ((prediction-Y[i][j])**2)*R[i][j]
            J += (1/2)*error
    for i in range(nm):
        J += lambda_*(np.sum(X[i]**2))/2
    for j in range(nu):
        J += lambda_*(np.sum(W[j]**2))/2
    J = J[0]
    ### END CODE HERE ### 

    return J

In [4]:
def cofi_cost_func_v(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Vectorized for speed. Uses tensorflow operations to be compatible with custom training loop.
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J