In [1]:
import tensorflow as tf

### Auto Diff

In [2]:
w = tf.Variable(3.0)
x = 1.0
y = 1.0
alpha = 0.01

iterations = 30
for iter in range(iterations):
    # tensorflow's gradient tape to record the steps
    with tf.GradientTape() as tape:
        fwb = w*x
        costJ = (fwb - y)**2

    [dJdw] = tape.gradient(costJ, [w])

    w.assign_add(-alpha*dJdw)

## Implementation

In [None]:
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

# iterations = 200
# for iter in range(iterations):
#     with tf.GradientTape() as tape:

#         cost_val = cofiCostFuncV(X, W, b, Ynorm, R, num_users, num_movies, lambda)
    
#     grads  = tape.gradient(cost_value, [x, w, b])

#     optimizer.apply_gradients(zip(grads, [x, w, b]))

### Finding related items

The features $x^{(i)}$ of item $i$ are quite hard to interpret, but collectively they do convey something about that movie, i, and to find realted items, we find a movie with similar features calcualted as,
$$||x^{(k)}-x^{(i)}||^2$$

## Limitations

### Cold start problem
- rank new items that few users have rated?
- show something reasonable to new users who have rated few items?

### Use side infromation about items or users:
- Item: Genre, movie stars, studio,...
- User: Demographics (age, gender, location), expressed preferences, ...

In [3]:
# GRADED FUNCTION: cofi_cost_func
# UNQ_C1

def cofi_cost_func(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    nm, nu = Y.shape
    J = 0
    ### START CODE HERE ###  
    for i in range(nm):
        for j in range(nu):
            prediction = np.dot(X[i], W[j]) + b[:, j]
            error = ((prediction-Y[i][j])**2)*R[i][j]
            J += (1/2)*error
    for i in range(nm):
        J += lambda_*(np.sum(X[i]**2))/2
    for j in range(nu):
        J += lambda_*(np.sum(W[j]**2))/2
    J = J[0]
    ### END CODE HERE ### 

    return J

In [4]:
def cofi_cost_func_v(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Vectorized for speed. Uses tensorflow operations to be compatible with custom training loop.
    Args:
      X (ndarray (num_movies,num_features)): matrix of item features
      W (ndarray (num_users,num_features)) : matrix of user parameters
      b (ndarray (1, num_users)            : vector of user parameters
      Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
      R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
      lambda_ (float): regularization parameter
    Returns:
      J (float) : Cost
    """
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J

# Content-Based Implementation

In [None]:
# # Load Data, set configuration variables
# item_train, user_train, y_train, item_features, user_features, item_vecs, movie_dict, user_to_genre = load_data()

# num_user_features = user_train.shape[1] - 3  # remove userid, rating count and ave rating during training
# num_item_features = item_train.shape[1] - 1  # remove movie id at train time
# uvs = 3  # user genre vector start
# ivs = 3  # item genre vector start
# u_s = 3  # start of columns to use in training, user
# i_s = 1  # start of columns to use in training, items
# print(f"Number of training vectors: {len(item_train)}")

In [None]:
# # scale training data
# item_train_unscaled = item_train
# user_train_unscaled = user_train
# y_train_unscaled    = y_train

# scalerItem = StandardScaler()
# scalerItem.fit(item_train)
# item_train = scalerItem.transform(item_train)

# scalerUser = StandardScaler()
# scalerUser.fit(user_train)
# user_train = scalerUser.transform(user_train)

# scalerTarget = MinMaxScaler((-1, 1))
# scalerTarget.fit(y_train.reshape(-1, 1))
# y_train = scalerTarget.transform(y_train.reshape(-1, 1))
# #ynorm_test = scalerTarget.transform(y_test.reshape(-1, 1))

# print(np.allclose(item_train_unscaled, scalerItem.inverse_transform(item_train)))
# print(np.allclose(user_train_unscaled, scalerUser.inverse_transform(user_train)))

In [None]:
# # GRADED_CELL
# # UNQ_C1

# num_outputs = 32
# tf.random.set_seed(1)
# user_NN = tf.keras.models.Sequential([
#     ### START CODE HERE ###     
#     tf.keras.layers.Dense(units= 256, activation= 'relu'),
#     tf.keras.layers.Dense(units = 128, activation = 'relu'),
#     tf.keras.layers.Dense(units = num_outputs, activation = 'linear')
#     ### END CODE HERE ###  
# ])

# item_NN = tf.keras.models.Sequential([
#     ### START CODE HERE ###     
#     tf.keras.layers.Dense(units= 256, activation = 'relu'),
#     tf.keras.layers.Dense(units = 128, activation = 'relu'),
#     tf.keras.layers.Dense(units = num_outputs, activation = 'linear')
#     ### END CODE HERE ###  
# ])

# # create the user input and point to the base network
# input_user = tf.keras.layers.Input(shape=(num_user_features))
# vu = user_NN(input_user)
# vu = tf.linalg.l2_normalize(vu, axis=1)

# # create the item input and point to the base network
# input_item = tf.keras.layers.Input(shape=(num_item_features))
# vm = item_NN(input_item)
# vm = tf.linalg.l2_normalize(vm, axis=1)

# # compute the dot product of the two vectors vu and vm
# output = tf.keras.layers.Dot(axes=1)([vu, vm])

# # specify the inputs and output of the model
# model = tf.keras.Model([input_user, input_item], output)

# model.summary()

In [None]:
# tf.random.set_seed(1)
# cost_fn = tf.keras.losses.MeanSquaredError()
# opt = keras.optimizers.Adam(learning_rate=0.01)
# model.compile(optimizer=opt,
#               loss=cost_fn)

In [None]:
# tf.random.set_seed(1)
# model.fit([user_train[:, u_s:], item_train[:, i_s:]], y_train, epochs=30)

In [None]:
# model.evaluate([user_test[:, u_s:], item_test[:, i_s:]], y_test)

In [None]:
# new_user_id = 5000
# new_rating_ave = 0.0
# new_action = 0.0
# new_adventure = 5.0
# new_animation = 0.0
# new_childrens = 0.0
# new_comedy = 0.0
# new_crime = 0.0
# new_documentary = 0.0
# new_drama = 0.0
# new_fantasy = 5.0
# new_horror = 0.0
# new_mystery = 0.0
# new_romance = 0.0
# new_scifi = 0.0
# new_thriller = 0.0
# new_rating_count = 3

# user_vec = np.array([[new_user_id, new_rating_count, new_rating_ave,
#                       new_action, new_adventure, new_animation, new_childrens,
#                       new_comedy, new_crime, new_documentary,
#                       new_drama, new_fantasy, new_horror, new_mystery,
#                       new_romance, new_scifi, new_thriller]])

In [None]:
# # generate and replicate the user vector to match the number movies in the data set.
# user_vecs = gen_user_vecs(user_vec,len(item_vecs))

# # scale our user and item vectors
# suser_vecs = scalerUser.transform(user_vecs)
# sitem_vecs = scalerItem.transform(item_vecs)

# # make a prediction
# y_p = model.predict([suser_vecs[:, u_s:], sitem_vecs[:, i_s:]])

# # unscale y prediction 
# y_pu = scalerTarget.inverse_transform(y_p)

# # sort the results, highest prediction first
# sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
# sorted_ypu   = y_pu[sorted_index]
# sorted_items = item_vecs[sorted_index]  #using unscaled vectors for display

# print_pred_movies(sorted_ypu, sorted_items, movie_dict, maxcount = 10)

<!-- uid = 2 
# form a set of user vectors. This is the same vector, transformed and repeated.
user_vecs, y_vecs = get_user_vecs(uid, user_train_unscaled, item_vecs, user_to_genre)

# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)

# make a prediction
y_p = model.predict([suser_vecs[:, u_s:], sitem_vecs[:, i_s:]])

# unscale y prediction 
y_pu = scalerTarget.inverse_transform(y_p)

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_items = item_vecs[sorted_index]  #using unscaled vectors for display
sorted_user  = user_vecs[sorted_index]
sorted_y     = y_vecs[sorted_index]

#print sorted predictions for movies rated by the user
print_existing_user(sorted_ypu, sorted_y.reshape(-1,1), sorted_user, sorted_items, ivs, uvs, movie_dict, maxcount = 50) -->

In [1]:
# # GRADED_FUNCTION: sq_dist
# # UNQ_C2
# def sq_dist(a,b):
#     """
#     Returns the squared distance between two vectors
#     Args:
#       a (ndarray (n,)): vector with n features
#       b (ndarray (n,)): vector with n features
#     Returns:
#       d (float) : distance
#     """
#     ### START CODE HERE ###     
#     d = np.sum((a-b)**2)
#     ### END CODE HERE ###     
#     return d

In [2]:
# a1 = np.array([1.0, 2.0, 3.0]); b1 = np.array([1.0, 2.0, 3.0])
# a2 = np.array([1.1, 2.1, 3.1]); b2 = np.array([1.0, 2.0, 3.0])
# a3 = np.array([0, 1, 0]);       b3 = np.array([1, 0, 0])
# print(f"squared distance between a1 and b1: {sq_dist(a1, b1):0.3f}")
# print(f"squared distance between a2 and b2: {sq_dist(a2, b2):0.3f}")
# print(f"squared distance between a3 and b3: {sq_dist(a3, b3):0.3f}")

In [3]:
# input_item_m = tf.keras.layers.Input(shape=(num_item_features))    # input layer
# vm_m = item_NN(input_item_m)                                       # use the trained item_NN
# vm_m = tf.linalg.l2_normalize(vm_m, axis=1)                        # incorporate normalization as was done in the original model
# model_m = tf.keras.Model(input_item_m, vm_m)                                
# model_m.summary()

In [None]:
# scaled_item_vecs = scalerItem.transform(item_vecs)
# vms = model_m.predict(scaled_item_vecs[:,i_s:])
# print(f"size of all predicted movie feature vectors: {vms.shape}")

In [None]:
# count = 50  # number of movies to display
# dim = len(vms)
# dist = np.zeros((dim,dim))

# for i in range(dim):
#     for j in range(dim):
#         dist[i,j] = sq_dist(vms[i, :], vms[j, :])
        
# m_dist = ma.masked_array(dist, mask=np.identity(dist.shape[0]))  # mask the diagonal

# disp = [["movie1", "genres", "movie2", "genres"]]
# for i in range(count):
#     min_idx = np.argmin(m_dist[i])
#     movie1_id = int(item_vecs[i,0])
#     movie2_id = int(item_vecs[min_idx,0])
#     disp.append( [movie_dict[movie1_id]['title'], movie_dict[movie1_id]['genres'],
#                   movie_dict[movie2_id]['title'], movie_dict[movie1_id]['genres']]
#                )
# table = tabulate.tabulate(disp, tablefmt='html', headers="firstrow")
# table