# Content Based Filtering

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
users = [ 'user1', 'user2', 'user3', 'user4' ]

movies = [ 'movie1', 'movie2', 'movie3', 'movie4', 'movie5', 'movie6']

features = [ 'action', 'drama', 'scifi', 'comedy', 'animated']

n_users = len(users)
n_movies = len(movies)
n_features = len(features)

## user-item matrix


sparse matrix for user and items rating matrix. 

shape $(n_{users}, n_{items})$

In [3]:
users, movies

(['user1', 'user2', 'user3', 'user4'],
 ['movie1', 'movie2', 'movie3', 'movie4', 'movie5', 'movie6'])

In [4]:
user_movies = tf.constant([
    [5, 6, 8, 0, 0, 1],
    [0, 0, 10, 10, 0, 5],
    [0, 9, 5, 5, 0, 0],
    [1, 5, 0, 0, 0, 5]
], dtype=tf.float32)

## item-feature martix

k-hot encoded matrix of item and feature matrix.

shape : $(n_{users}, n_{items})$

In [5]:
movies, features

(['movie1', 'movie2', 'movie3', 'movie4', 'movie5', 'movie6'],
 ['action', 'drama', 'scifi', 'comedy', 'animated'])

In [6]:
movie_features = tf.constant([
    [1, 0, 1, 0 ,0],
    [0, 1, 0, 1, 0],
    [0, 1, 0, 1, 1],
    [1, 1, 0, 0, 1],
    [0, 0, 1, 0, 1],
    [0, 0, 1, 1, 1]
], dtype=tf.float32)

## user-feature matrix

$(n_{users}, n_{items}) \times (n_{users}, n_{items})$

In [7]:
user_feaures = tf.matmul(user_movies, movie_features)
user_feaures

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[ 5., 14.,  6., 15.,  9.],
       [10., 20.,  5., 15., 25.],
       [ 5., 19.,  0., 14., 10.],
       [ 1.,  5.,  6., 10.,  5.]], dtype=float32)>

normalizing the matrix

In [8]:
user_features_norm = user_feaures / tf.reduce_sum(user_feaures, axis=0, keepdims=True)
user_features_norm

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[0.23809524, 0.2413793 , 0.3529412 , 0.2777778 , 0.18367347],
       [0.47619048, 0.3448276 , 0.29411766, 0.2777778 , 0.5102041 ],
       [0.23809524, 0.3275862 , 0.        , 0.25925925, 0.20408164],
       [0.04761905, 0.0862069 , 0.3529412 , 0.18518518, 0.10204082]],
      dtype=float32)>


## user feature preference

get top k values from the user-feature matrix.

tensorflow's function to get top k values.

In [9]:
tf.nn.top_k(user_features_norm, k=1)

TopKV2(values=<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[0.3529412],
       [0.5102041],
       [0.3275862],
       [0.3529412]], dtype=float32)>, indices=<tf.Tensor: shape=(4, 1), dtype=int32, numpy=
array([[2],
       [4],
       [1],
       [2]], dtype=int32)>)

In [10]:
top_values, top_feature_idx = tf.nn.top_k(user_features_norm, k=n_features)

In [11]:
top_values

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[0.3529412 , 0.2777778 , 0.2413793 , 0.23809524, 0.18367347],
       [0.5102041 , 0.47619048, 0.3448276 , 0.29411766, 0.2777778 ],
       [0.3275862 , 0.25925925, 0.23809524, 0.20408164, 0.        ],
       [0.3529412 , 0.18518518, 0.10204082, 0.0862069 , 0.04761905]],
      dtype=float32)>

In [12]:
top_feature_idx

<tf.Tensor: shape=(4, 5), dtype=int32, numpy=
array([[2, 3, 1, 0, 4],
       [4, 0, 1, 2, 3],
       [1, 3, 0, 4, 2],
       [2, 3, 4, 1, 0]], dtype=int32)>

In [13]:
for i in range(n_users):
    print(users[i]," : ", [features[index] for index in top_feature_idx[i]])

user1  :  ['scifi', 'comedy', 'drama', 'action', 'animated']
user2  :  ['animated', 'action', 'drama', 'scifi', 'comedy']
user3  :  ['drama', 'comedy', 'action', 'animated', 'scifi']
user4  :  ['scifi', 'comedy', 'animated', 'drama', 'action']


## Get movie recommendations

In [14]:
user_ratings = tf.matmul(user_features_norm, tf.transpose(movie_features))

user_ratings

<tf.Tensor: shape=(4, 6), dtype=float32, numpy=
array([[0.59103644, 0.5191571 , 0.70283055, 0.66314805, 0.53661466,
        0.81439245],
       [0.77030814, 0.6226054 , 1.1328094 , 1.3312222 , 0.80432177,
        1.0820996 ],
       [0.23809524, 0.58684546, 0.7909271 , 0.7697631 , 0.20408164,
        0.46334088],
       [0.40056023, 0.27139208, 0.3734329 , 0.23586676, 0.454982  ,
        0.6401672 ]], dtype=float32)>

In [15]:
user_movies

<tf.Tensor: shape=(4, 6), dtype=float32, numpy=
array([[ 5.,  6.,  8.,  0.,  0.,  1.],
       [ 0.,  0., 10., 10.,  0.,  5.],
       [ 0.,  9.,  5.,  5.,  0.,  0.],
       [ 1.,  5.,  0.,  0.,  0.,  5.]], dtype=float32)>

In [16]:
unseen_movies = tf.equal(user_movies, tf.zeros_like(user_movies))
unseen_movies

<tf.Tensor: shape=(4, 6), dtype=bool, numpy=
array([[False, False, False,  True,  True, False],
       [ True,  True, False, False,  True, False],
       [ True, False, False, False,  True,  True],
       [False, False,  True,  True,  True, False]])>

In [17]:
ignore_matrix = tf.zeros_like(user_movies)
ignore_matrix

<tf.Tensor: shape=(4, 6), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]], dtype=float32)>

In [18]:
new_ratings = tf.where(condition=unseen_movies, x=user_ratings, y=ignore_matrix)

new_ratings

<tf.Tensor: shape=(4, 6), dtype=float32, numpy=
array([[0.        , 0.        , 0.        , 0.66314805, 0.53661466,
        0.        ],
       [0.77030814, 0.6226054 , 0.        , 0.        , 0.80432177,
        0.        ],
       [0.23809524, 0.        , 0.        , 0.        , 0.20408164,
        0.46334088],
       [0.        , 0.        , 0.3734329 , 0.23586676, 0.454982  ,
        0.        ]], dtype=float32)>

### 2 movie recommendations

In [19]:
top_movies_idx = tf.nn.top_k(user_ratings, k = 2)[1]

In [20]:
for i in range(n_users):
    print(users[i], [movies[index] for index in top_movies_idx[i]])

user1 ['movie6', 'movie3']
user2 ['movie4', 'movie3']
user3 ['movie3', 'movie4']
user4 ['movie6', 'movie5']
