# Collaborative Filtering

## Librarys

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import tensorflow as tf
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

random.seed(100000)

## Data - Movie ratings dataset

The data set is derived from the [MovieLens "ml-latest-small"](https://grouplens.org/datasets/movielens/latest/) dataset.   
[F. Maxwell Harper and Joseph A. Konstan. 2015. The MovieLens Datasets: History and Context. ACM Transactions on Interactive Intelligent Systems (TiiS) 5, 4: 19:1–19:19. <https://doi.org/10.1145/2827872>]

The original dataset has  9000 movies rated by 600 users. The dataset has been reduced in size to focus on movies from the years since 2000. This dataset consists of ratings on a scale of 0.5 to 5 in 0.5 step increments. The reduced dataset has $n_u = 443$ users, and $n_m= 4778$ movies. 

Below, you will load the movie dataset into the variables $Y$ and $R$.

The matrix $Y$ (a  $n_m \times n_u$ matrix) stores the ratings $y^{(i,j)}$. The matrix $R$ is an binary-valued indicator matrix, where $R(i,j) = 1$ if user $j$ gave a rating to movie $i$, and $R(i,j)=0$ otherwise. 

Throughout this part of the exercise, you will also be working with the
matrices, $\mathbf{X}$, $\mathbf{W}$ and $\mathbf{b}$: 

$$\mathbf{X} = 
\begin{bmatrix}
--- (\mathbf{x}^{(0)})^T --- \\
--- (\mathbf{x}^{(1)})^T --- \\
\vdots \\
--- (\mathbf{x}^{(n_m-1)})^T --- \\
\end{bmatrix} , \quad
\mathbf{W} = 
\begin{bmatrix}
--- (\mathbf{w}^{(0)})^T --- \\
--- (\mathbf{w}^{(1)})^T --- \\
\vdots \\
--- (\mathbf{w}^{(n_u-1)})^T --- \\
\end{bmatrix},\quad
\mathbf{ b} = 
\begin{bmatrix}
 b^{(0)}  \\
 b^{(1)} \\
\vdots \\
b^{(n_u-1)} \\
\end{bmatrix}\quad
$$ 

The $i$-th row of $\mathbf{X}$ corresponds to the
feature vector $x^{(i)}$ for the $i$-th movie, and the $j$-th row of
$\mathbf{W}$ corresponds to one parameter vector $\mathbf{w}^{(j)}$, for the
$j$-th user. Both $x^{(i)}$ and $\mathbf{w}^{(j)}$ are $n$-dimensional
vectors. For the purposes of this exercise, you will use $n=10$, and
therefore, $\mathbf{x}^{(i)}$ and $\mathbf{w}^{(j)}$ have 10 elements.
Correspondingly, $\mathbf{X}$ is a
$n_m \times 10$ matrix and $\mathbf{W}$ is a $n_u \times 10$ matrix.

We will start by loading the movie ratings dataset to understand the structure of the data.
We will load $Y$ and $R$ with the movie dataset.  
We'll also load $\mathbf{X}$, $\mathbf{W}$, and $\mathbf{b}$ with pre-computed values. These values will be learned later in the lab, but we'll use pre-computed values to develop the cost model.

In [2]:
#Load data
df_movies_original = pd.read_csv('data\movies\movies.csv')
df_ratings_original = pd.read_csv('data\movies\\ratings.csv')

In [3]:
df_movies = df_movies_original.copy()
df_ratings = df_ratings_original.copy()

years = [str(year) for year in range(2000, 2022)]

df_movies['year'] = df_movies['title'].str.extract(r'\((\d{4})\)')

mask = df_movies['year'].isin(years)
df_movies = df_movies[mask]

genres_one_hot_encoding = df_movies['genres'].str.get_dummies(sep='|')
df_movies = pd.concat([df_movies, genres_one_hot_encoding], axis=1)

df_movies.reset_index(inplace=True)
df_movies.drop(columns=['index', 'genres', '(no genres listed)'], inplace=True)

In [4]:
ratings_filtered_mask = df_ratings['movieId'].isin(df_movies['movieId'])
df_ratings = df_ratings[ratings_filtered_mask]
df_ratings.reset_index(inplace=True)
df_ratings.drop(columns=['index', 'timestamp'], inplace=True)

In [5]:
# Create new ids
new_movie_ids = df_movies.index
new_user_ids = range(df_ratings['userId'].nunique())

# Dictionarys to map old ids with new ones
id_mapping = dict(zip(df_movies['movieId'], new_movie_ids))
user_id_mapping = dict(zip(sorted(df_ratings['userId'].unique()), new_user_ids))

df_ratings['movieId'] = df_ratings['movieId'].map(id_mapping)
df_ratings['userId'] = df_ratings['userId'].map(user_id_mapping)
df_movies['movieId'] = df_movies['movieId'].map(id_mapping)

df_movies_with_title = df_movies.copy()

df_movies.drop(columns=['title', 'year'], inplace=True)

In [6]:
df_movies_with_title.head()

Unnamed: 0,movieId,title,year,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,"Yards, The (2000)",2000,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,1,Next Friday (2000),2000,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,Supernova (2000),2000,0,1,0,0,0,0,0,...,0,0,0,0,0,0,1,1,0,0
3,3,Down to You (2000),2000,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
4,4,Scream 3 (2000),2000,0,0,0,0,1,0,0,...,0,1,0,0,1,0,0,1,0,0


In [7]:
df_ratings.head()

Unnamed: 0,userId,movieId,rating
0,0,4,5.0
1,0,51,5.0
2,0,60,4.0
3,0,68,4.0
4,0,79,5.0


In [8]:
df_movies.head()

Unnamed: 0,movieId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0
3,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,4,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0


In [9]:
X = df_movies.drop(columns='movieId')
num_movies = X.shape[0]
num_features = X.shape[1]
num_users = df_ratings['userId'].nunique()
R = np.zeros((num_movies, num_users))
Y = np.zeros((num_movies, num_users))

for index, row in df_ratings.iterrows():
    # Movie index and user index for the matrixes
    movie_index = int(row['movieId']) - 1  # Subtract 1 to adjust to base 0
    user_id = int(row['userId']) - 1  # Subtract 1 to adjust to base 0
    
    # Fill R with 1 if user rated the movie
    R[movie_index, user_id] = 1
    
    # Fill Y with corresponding rate
    Y[movie_index, user_id] = row['rating']

In [10]:
print("Y", Y.shape, "R", R.shape)
print("X", X.shape)
# print("W", W.shape)
# print("b", b.shape)
print("num_features", num_features)
print("num_movies",   num_movies)
print("num_users",    num_users)

Y (4780, 443) R (4780, 443)
X (4780, 19)
num_features 19
num_movies 4780
num_users 443


In [11]:
#  From the matrix, we can compute statistics like average rating.
tsmean =  np.mean(Y[0, R[0, :].astype(bool)])
print(f"Average rating for movie 1 : {tsmean:0.3f} / 5" )

Average rating for movie 1 : 3.250 / 5


## Recommender systems


The goal of a collaborative filtering recommender system is to generate two vectors: For each user, a 'parameter vector' that embodies the movie tastes of a user. For each movie, a feature vector of the same size which embodies some description of the movie. The dot product of the two vectors plus the bias term should produce an estimate of the rating the user might give to that movie.

### Collaborative filtering learning algorithm

Now, you will begin implementing the collaborative filtering learning
algorithm. You will start by implementing the objective function. 

The collaborative filtering algorithm in the setting of movie
recommendations considers a set of $n$-dimensional parameter vectors
$\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)}$, $\mathbf{w}^{(0)},...,\mathbf{w}^{(n_u-1)}$ and $b^{(0)},...,b^{(n_u-1)}$, where the
model predicts the rating for movie $i$ by user $j$ as
$y^{(i,j)} = \mathbf{w}^{(j)}\cdot \mathbf{x}^{(i)} + b^{(j)}$ . Given a dataset that consists of
a set of ratings produced by some users on some movies, you wish to
learn the parameter vectors $\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)},
\mathbf{w}^{(0)},...,\mathbf{w}^{(n_u-1)}$  and $b^{(0)},...,b^{(n_u-1)}$ that produce the best fit (minimizes
the squared error).

You will complete the code in cofiCostFunc to compute the cost
function for collaborative filtering. 

#### Collaborative filtering cost function

The collaborative filtering cost function is given by
$$J({\mathbf{x}^{(0)},...,\mathbf{x}^{(n_m-1)},\mathbf{w}^{(0)},b^{(0)},...,\mathbf{w}^{(n_u-1)},b^{(n_u-1)}})= \left[ \frac{1}{2}\sum_{(i,j):r(i,j)=1}(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+ \underbrace{\left[
\frac{\lambda}{2}
\sum_{j=0}^{n_u-1}\sum_{k=0}^{n-1}(\mathbf{w}^{(j)}_k)^2
+ \frac{\lambda}{2}\sum_{i=0}^{n_m-1}\sum_{k=0}^{n-1}(\mathbf{x}_k^{(i)})^2
\right]}_{regularization}
\tag{1}$$
The first summation in (1) is "for all $i$, $j$ where $r(i,j)$ equals $1$" and could be written:

$$
= \left[ \frac{1}{2}\sum_{j=0}^{n_u-1} \sum_{i=0}^{n_m-1}r(i,j)*(\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)} - y^{(i,j)})^2 \right]
+\text{regularization}
$$

You should now write cofiCostFunc (collaborative filtering cost function) to return this cost.

In [12]:
def cofi_cost_func(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Args:
        X (ndarray (num_movies,num_features)): matrix of item features
        W (ndarray (num_users,num_features)) : matrix of user parameters
        b (ndarray (1, num_users)            : vector of user parameters
        Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
        R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
        lambda_ (float): regularization parameter
    Returns:
        J (float) : Cost
    """
    nm, nu = Y.shape
    J = 0
    
    for j in range(nu):
        for i in range(nm):
            f_wxb = (np.dot(W[j], X[i]) + b[0][j])
            J += R[i][j] * (f_wxb - Y[i][j])**2
            
            
#     w_sum = 0
#     for j in range(nu):
#         for k in range(len(W[j])):
#             w_sum += W[j][k]**2
#     w_sum *= lambda_

#     x_sum = 0
#     for j in range(nu):
#         print(X[j])
#         for k in range(len(X[j])):
#             print(X[j][k])
#             x_sum += X[j][k]**2
#     x_sum *= lambda_

#     J += w_sum + x_sum

    J *= 1/2
    J += (lambda_/2) * (np.sum(np.square(W)) + np.sum(np.square(X)))
    

    return J

**Vectorized Implementation**

It is important to create a vectorized implementation to compute $J$, since it will later be called many times during optimization.

In [13]:
def cofi_cost_func_v(X, W, b, Y, R, lambda_):
    """
    Returns the cost for the content-based filtering
    Vectorized for speed. Uses tensorflow operations to be compatible with custom training loop.
    Args:
        X (ndarray (num_movies,num_features)): matrix of item features
        W (ndarray (num_users,num_features)) : matrix of user parameters
        b (ndarray (1, num_users)            : vector of user parameters
        Y (ndarray (num_movies,num_users)    : matrix of user ratings of movies
        R (ndarray (num_movies,num_users)    : matrix, where R(i, j) = 1 if the i-th movies was rated by the j-th user
        lambda_ (float): regularization parameter
    Returns:
        J (float) : Cost
    """
    X = tf.convert_to_tensor(X, dtype=tf.float64)
    W = tf.convert_to_tensor(W, dtype=tf.float64)
    j = (tf.linalg.matmul(X, tf.transpose(W)) + b - Y)*R
    J = 0.5 * tf.reduce_sum(j**2) + (lambda_/2) * (tf.reduce_sum(X**2) + tf.reduce_sum(W**2))
    return J

In [14]:
def initialize_parameters(num_users, num_features):
    """
    Initializes the parameters W and b for collaborative filtering algorithm
    
    Args:
    num_users (int): Number of users
    num_features (int): Number of features
    
    Returns:
    W (ndarray): Initialized matrix of user parameters
    b (ndarray): Initialized vector of user parameters
    """
    # Initialize W with random values
    W = np.random.randn(num_users, num_features)
    
    # Initialize b with zeros
    b = np.zeros((1, num_users))
    
    return W, b

In [15]:
W, b = initialize_parameters(num_users, num_features)

# Evaluate cost function
J = cofi_cost_func(np.array(X), W, b, Y, R, 0)
print(f"Cost: {J:0.2f}")

# Evaluate cost function with regularization 
J = cofi_cost_func(np.array(X), W, b, Y, R, 1.5)
print(f"Cost (with regularization): {J:0.2f}")

Cost: 296706.59
Cost (with regularization): 311437.65


In [16]:
# Evaluate cost function
J = cofi_cost_func_v(np.array(X), W, b, Y, R, 0)
print(f"Cost: {J:0.2f}")

# Evaluate cost function with regularization 
J = cofi_cost_func_v(np.array(X), W, b, Y, R, 1.5)
print(f"Cost (with regularization): {J:0.2f}")

Cost: 296706.59
Cost (with regularization): 311437.65


#### Learning movie recommendations 

In [17]:
my_ratings = np.zeros(num_movies)          #  Initialize my ratings

# Check the file small_movie_list.csv for id of each movie in our dataset
# For example, Toy Story 3 (2010) has ID 2700, so to rate it "5", you can set
my_ratings[2700] = 5 

#Or suppose you did not enjoy Persuasion (2007), you can set
my_ratings[2609] = 2

# We have selected a few movies we liked / did not like and the ratings we
# gave are as follows:
my_ratings[929]  = 5   # Lord of the Rings: The Return of the King, The
my_ratings[246]  = 5   # Shrek (2001)
my_ratings[2716] = 3   # Inception
my_ratings[1150] = 5   # Incredibles, The (2004)
my_ratings[382]  = 2   # Amelie (Fabuleux destin d'Amélie Poulain, Le)
my_ratings[366]  = 5   # Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
my_ratings[622]  = 5   # Harry Potter and the Chamber of Secrets (2002)
my_ratings[988]  = 3   # Eternal Sunshine of the Spotless Mind (2004)
my_ratings[2925] = 1   # Louis Theroux: Law & Disorder (2008)
my_ratings[2937] = 1   # Nothing to Declare (Rien à déclarer)
my_ratings[793]  = 5   # Pirates of the Caribbean: The Curse of the Black Pearl (2003)
my_rated = [i for i in range(len(my_ratings)) if my_ratings[i] > 0]

print('\nNew user ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0 :
        print(f'Rated {my_ratings[i]} for  {df_movies_with_title.loc[i,"title"]}')


New user ratings:

Rated 5.0 for  Shrek (2001)
Rated 5.0 for  Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Rated 2.0 for  Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
Rated 5.0 for  Harry Potter and the Chamber of Secrets (2002)
Rated 5.0 for  Pirates of the Caribbean: The Curse of the Black Pearl (2003)
Rated 5.0 for  Lord of the Rings: The Return of the King, The (2003)
Rated 3.0 for  Eternal Sunshine of the Spotless Mind (2004)
Rated 5.0 for  Incredibles, The (2004)
Rated 2.0 for  Shutter Island (2010)
Rated 5.0 for  A-Team, The (2010)
Rated 3.0 for  Despicable Me (2010)
Rated 1.0 for  Kill the Irishman (2011)
Rated 1.0 for  Tree of Life, The (2011)


In [18]:
# Add new user ratings to Y 
Y = np.c_[my_ratings, Y]

# Add new user indicator matrix to R
R = np.c_[(my_ratings != 0).astype(int), R]

# Normalize the Dataset
Ymean = np.mean(Y, axis=1, keepdims=True)
Ynorm = Y - Ymean

# Multiplicar pelo indicador de presença de avaliação
Ynorm = Ynorm * R


Let's prepare to train the model. Initialize the parameters and select the Adam optimizer.

In [19]:
#  Useful Values
num_movies, num_users = Y.shape
num_features = 100

# Set Initial Parameters (W, X), use tf.Variable to track these variables
tf.random.set_seed(1234) # for consistent results
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_movies, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1,          num_users),   dtype=tf.float64),  name='b')

# Instantiate an optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-1)

Let's now train the collaborative filtering model. This will learn the parameters $\mathbf{X}$, $\mathbf{W}$, and $\mathbf{b}$.

The operations involved in learning $w$, $b$, and $x$ simultaneously do not fall into the typical 'layers' offered in the TensorFlow neural network package.  Consequently, the flow used in neural_networks.ipynb: Model, Compile(), Fit(), Predict(), are not directly applicable. Instead, we can use a custom training loop.

Recalling the steps of gradient descent.
- repeat until convergence:
    - compute forward pass
    - compute the derivatives of the loss relative to parameters
    - update the parameters using the learning rate and the computed derivatives 
    
TensorFlow has the marvelous capability of calculating the derivatives for you. This is shown below. Within the `tf.GradientTape()` section, operations on Tensorflow Variables are tracked. When `tape.gradient()` is later called, it will return the gradient of the loss relative to the tracked variables. The gradients can then be applied to the parameters using an optimizer. 
This is a very brief introduction to a useful feature of TensorFlow and other machine learning frameworks. Further information can be found by investigating "custom training loops" within the framework of interest.
    


In [20]:
iterations = 200
lambda_ = 1
for iter in range(iterations):
    # Use TensorFlow’s GradientTape
    # to record the operations used to compute the cost 
    with tf.GradientTape() as tape:

        # Compute the cost (forward pass included in cost)
        cost_value = cofi_cost_func_v(X, W, b, Ynorm, R, lambda_)

    # Use the gradient tape to automatically retrieve
    # the gradients of the trainable variables with respect to the loss
    grads = tape.gradient( cost_value, [X,W,b] )

    # Run one step of gradient descent by updating
    # the value of the variables to minimize the loss.
    optimizer.apply_gradients( zip(grads, [X,W,b]) )

    # Log periodically.
    if iter % 20 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 2477728.9
Training loss at iteration 20: 144751.8
Training loss at iteration 40: 57918.2
Training loss at iteration 60: 28805.9
Training loss at iteration 80: 16693.5
Training loss at iteration 100: 10918.3
Training loss at iteration 120: 7858.3
Training loss at iteration 140: 6100.9
Training loss at iteration 160: 5022.6
Training loss at iteration 180: 4322.3


#### Recommendations
Now compute the ratings for all the movies and users and display the movies that are recommended. These are based on the movies and ratings entered as `my_ratings[]` above. To predict the rating of movie $i$ for user $j$, you compute $\mathbf{w}^{(j)} \cdot \mathbf{x}^{(i)} + b^{(j)}$. This can be computed for all ratings using matrix multiplication.

In [21]:
# Make a prediction using trained weights and biases
p = np.matmul(X.numpy(), np.transpose(W.numpy())) + b.numpy()

#restore the mean
pm = p + Ymean

my_predictions = pm[:,0]

# sort predictions
ix = tf.argsort(my_predictions, direction='DESCENDING')

movieList = list(df_movies_with_title['title'])

for i in range(17):
    j = ix[i]
    if j not in my_rated:
        print(f'Predicting rating {my_predictions[j]:0.2f} for movie {movieList[j]}')

print('\n\nOriginal vs Predicted ratings:\n')
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print(f'Original {my_ratings[i]}, Predicted {my_predictions[i]:0.2f} for {movieList[i]}')

Predicting rating 5.57 for movie Respiro (2002)
Predicting rating 5.51 for movie Meet the Parents (2000)
Predicting rating 5.50 for movie Miss Congeniality (2000)
Predicting rating 5.25 for movie Swimming Pool (2003)
Predicting rating 5.24 for movie Class, The (Klass) (2007)
Predicting rating 5.22 for movie Narc (2002)
Predicting rating 5.07 for movie Drag Me to Hell (2009)
Predicting rating 5.00 for movie Domestic Disturbance (2001)
Predicting rating 4.99 for movie Big Momma's House (2000)
Predicting rating 4.98 for movie That Awkward Moment (2014)
Predicting rating 4.92 for movie OH in Ohio, The (2006)
Predicting rating 4.91 for movie Intolerable Cruelty (2003)
Predicting rating 4.89 for movie Hollywood Ending (2002)
Predicting rating 4.87 for movie No Man's Land (2001)
Predicting rating 4.86 for movie Children of Huang Shi, The (2008)


Original vs Predicted ratings:

Original 5.0, Predicted 4.72 for Shrek (2001)
Original 5.0, Predicted 4.69 for Harry Potter and the Sorcerer's Stone

### Content-based filtering with a neural network

Before, in the collaborative filtering, we generated two vectors, a user vector and an item/movie vector whose dot product would predict a rating. The vectors were derived solely from the ratings.   

Content-based filtering also generates a user and movie feature vector but recognizes there may be other information available about the user and/or movie that may improve the prediction. The additional information is provided to a neural network which then generates the user and movie vector as shown below.

In [22]:
user_train = df_ratings
user_train = user_train.merge(df_movies_with_title.drop(columns='title'), on='movieId')

genre_columns = [col for col in user_train.columns if col not in ['movieId', 'userId', 'rating', 'year']]
user_train[genre_columns] = user_train[genre_columns].multiply(user_train['rating'], axis=0)

y_train = pd.DataFrame(user_train['rating'].copy())

user_train.drop(columns=['movieId', 'rating', 'year'], inplace=True)

user_train = user_train.groupby('userId').sum().reset_index()
user_train.sort_values(by='userId', inplace=True)
# user_train.drop(columns=['userId'], inplace=True)
user_train.head()

Unnamed: 0,userId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,14.0,10.0,0.0,0.0,9.0,4.0,0.0,5.0,0.0,0.0,5.0,0.0,0.0,5.0,0.0,5.0,9.0,0.0,0.0
1,1,43.5,12.5,0.0,0.0,24.0,35.0,13.0,58.5,0.0,0.0,3.0,15.0,0.0,8.0,0.0,15.5,37.0,4.5,3.5
2,2,0.5,0.5,0.0,0.5,0.5,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,0.0
3,3,7.0,7.0,0.0,4.0,35.0,16.0,0.0,89.0,8.0,0.0,4.0,0.0,1.0,2.0,18.0,0.0,18.0,13.0,0.0
4,4,114.0,105.0,32.0,32.5,84.5,54.5,0.0,114.0,52.5,3.5,5.5,19.0,22.0,26.0,38.0,67.0,86.0,15.0,0.0


In [23]:
item_train = df_movies_with_title
average_ratings = df_ratings.groupby('movieId')['rating'].mean().reset_index()
item_train = df_movies_with_title.merge(average_ratings, on='movieId', how='left')
item_train.rename(columns={'rating': 'ave rating'}, inplace=True)

item_train = item_train.drop(columns=['movieId', 'title'])
item_train.head()

Unnamed: 0,year,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,...,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,ave rating
0,2000,0,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,3.4
1,2000,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3.25
2,2000,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,1,0,0,2.0
3,2000,0,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,2.0
4,2000,0,0,0,0,1,0,0,0,0,...,1,0,0,1,0,0,1,0,0,2.672414


In [24]:
user_train.shape

(443, 20)

In [25]:
# scale training data
item_train_unscaled = item_train.copy()
user_train_unscaled = user_train.copy()
y_train_unscaled    = y_train.copy()

scalerItem = StandardScaler()
item_train = scalerItem.fit_transform(item_train)

scalerUser = StandardScaler()
user_train = scalerUser.fit_transform(user_train)

scalerTarget = MinMaxScaler((-1, 1))
y_train = scalerTarget.fit_transform(np.array(y_train).reshape(-1, 1))

In [26]:
item_train, item_test = train_test_split(item_train, train_size=0.80, shuffle=True, random_state=1)
user_train, user_test = train_test_split(user_train, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)
print(f"movie/item training data shape: {item_train.shape}")
print(f"movie/item test data shape: {item_test.shape}")

movie/item training data shape: (3824, 21)
movie/item test data shape: (956, 21)


#### Implementation of Neural Network for content-based filtering

In [27]:
num_user_features = (user_train.shape[1],)  # Comma to transform into tupple
num_item_features = (item_train.shape[1],)
num_outputs = 32
tf.random.set_seed(1)

user_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(units=256, activation='relu'),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dense(units=num_outputs)
])

item_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(units=256, activation='relu'),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dense(units=num_outputs)
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=num_user_features)
input_item = tf.keras.layers.Input(shape=num_item_features)

# Saída das redes neurais para usuários e itens
vu = user_NN(input_user)
vm = item_NN(input_item)

# Normalização L2 dos vetores de saída das redes neurais
vu_normalized = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))(vu)
vm_normalized = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))(vm)

# Produto escalar dos vetores normalizados
output = tf.keras.layers.Dot(axes=1)([vu_normalized, vm_normalized])

# Modelo final
model = tf.keras.Model([input_user, input_item], output)

model.summary()




In [28]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss=cost_fn)

In [29]:
tf.random.set_seed(1)
model.fit([user_train, item_train], y_train, epochs=30)

ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 354, 3824
'y' sizes: 31402


In [None]:
model.evaluate([user_test[:, u_s:], item_test[:, i_s:]], y_test)

#### Predictions

In [None]:
new_user_id = 5000
new_rating_ave = 0.0
new_action = 0.0
new_adventure = 5.0
new_animation = 0.0
new_childrens = 0.0
new_comedy = 0.0
new_crime = 0.0
new_documentary = 0.0
new_drama = 0.0
new_fantasy = 5.0
new_horror = 0.0
new_mystery = 0.0
new_romance = 0.0
new_scifi = 0.0
new_thriller = 0.0
new_rating_count = 3

user_vec = np.array([[new_user_id, new_rating_count, new_rating_ave,
                      new_action, new_adventure, new_animation, new_childrens,
                      new_comedy, new_crime, new_documentary,
                      new_drama, new_fantasy, new_horror, new_mystery,
                      new_romance, new_scifi, new_thriller]])