# Product Recommendation

In this blog post, I will walk through how you can manually make a product recommendation system for movies.

## Loading in data:

In [157]:
# Importing libraries:
import scipy.io as sio
import numpy as np
import pandas as pd
from scipy.optimize import minimize, rosen, rosen_der

movie_info = sio.loadmat('../data/movies.mat')
user_info = sio.loadmat('../data/movieParams.mat')

ratings = movie_info['R']
y = movie_info['Y']
X = user_info['X']
theta = user_info['Theta']

In [158]:
def compute_cost(X_theta, y, rated, reg_coeff, num_features):
    # Get dimensions
    num_users = y.shape[1]
    num_movies = y.shape[0]
    
    # Reconstructing X:
    X = X_theta[0:num_movies*num_features]
    X = X.reshape((num_movies, num_features))
    
    # Reconstructing theta:
    theta = X_theta[num_movies*num_features:]
    theta = theta.reshape((num_users, num_features))
    
    # Calculating estimate:
    y_hat = np.dot(X, theta.T)
    
    # Calculating error:
    error = np.multiply((y_hat - y), rated)
    sq_error = error**2
    
    # Calculating cost:
    theta_regularization = (reg_coeff/2)*(np.sum(theta**2))
    X_regularization = (reg_coeff/2)*(np.sum(X**2))                
    J =  (1/2)*np.sum(sq_error) + theta_regularization + X_regularization
    
    # Calculating gradients:
    theta_gradient = np.dot(error.T,X) + reg_coeff*theta
    X_gradient = np.dot(error,theta) + reg_coeff*X 
    X_theta_gradient = np.append(np.ravel(X_gradient), np.ravel(theta_gradient))

    return(J, X_theta_gradient)

small_users = 4
small_movies = 5
small_features = 3

X_small = X[0:small_movies, 0:small_features]
theta_small = theta[0:small_users, 0:small_features]
ratings_small = ratings[0:small_movies, 0:small_users]
y_small = y[0:small_movies, 0:small_users]

X_theta_small = np.append(np.ravel(X_small), np.ravel(theta_small))

compute_cost(X_theta_small, y_small, ratings_small, 1.5, small_features)

(31.344056244274221,
 array([ -0.95596339,   6.97535514,  -0.10861109,   0.60308088,
          2.77421145,   0.25839822,   0.12985616,   4.0898522 ,
         -0.89247334,   0.29684395,   1.06300933,   0.66738144,
          0.60252677,   4.90185327,  -0.19747928, -10.13985478,
          2.10136256,  -6.76563628,  -2.29347024,   0.48244098,
         -2.99791422,  -0.64787484,  -0.71820673,   1.27006666,
          1.09289758,  -0.40784086,   0.49026541]))

In [141]:
movie_titles = []
f = open("../data/movie_ids.txt", encoding='ISO-8859-1')
for line in f:
    title_list = line.split(" ")[1:-1]
    movie_titles.append(" ".join(title_list))

In [175]:
# Getting data
ratings = movie_info['R']
y = movie_info['Y']
X = user_info['X']
theta = user_info['Theta']

# getting dimensions:
num_features = 10
num_movies = y.shape[0]
num_users = y.shape[1]

# Making new user:
new_user = np.zeros(num_movies)
rated = np.zeros(num_movies)

# Entering user preferences:
new_user[0] = 4  
new_user[6] = 3  
new_user[11] = 5  
new_user[53] = 4  
new_user[63] = 5  
new_user[65] = 3  
new_user[68] = 5  
new_user[97] = 2  
new_user[182] = 4  
new_user[225] = 5  
new_user[354] = 5

# Marking which movies the user has rated:
for i,r in enumerate(new_user):
    if r != 0:
        rated[i] = 1
ratings = np.vstack((ratings.T, rated)).T

for i, movie_title in enumerate(movie_titles):
    if rated[i] == 1:
        print(movie_title)

# adding user preferences to database
y = np.vstack((y.T, new_user)).T

# getting dimensions:
num_features = 10
num_movies = y.shape[0]
num_users = y.shape[1]

# making random x values - this corresponds to initializing user preferences
X = np.random.rand(num_users, num_features)

# making random theta values - this corresponds to initializing movie attributes
theta = np.random.rand(num_movies, num_features)

# Combining x and theta:
X_theta = np.append(np.ravel(X), np.ravel(theta))

# Normalizing y:
y_mean = np.mean(y,axis=1)
y_norm = y.T - y_mean.T
y_norm = y_norm.T

Toy Story
Twelve Monkeys
Usual Suspects, The
Outbreak
Shawshank Redemption, The
While You Were Sleeping
Forrest Gump
Silence of the Lambs, The
Alien
Die Hard 2
Sphere


## Training:

In [200]:
reg_coeff = 10

min_results = minimize(fun=compute_cost,
         x0=X_theta, 
         method='CG',
         jac=True,
         args=(y_norm, ratings, reg_coeff, num_features))

min_results

     fun: 68305.08381545279
     jac: array([  7.85827456e-07,  -2.42152836e-06,  -4.47880930e-07, ...,
        -3.83137121e-07,   7.35389474e-08,  -5.66233779e-07])
 message: 'Desired error not necessarily achieved due to precision loss.'
    nfev: 3382
     nit: 2214
    njev: 3380
  status: 2
 success: False
       x: array([ 0.38111754,  1.03017661,  0.28424593, ...,  0.40067109,
        0.37667344,  0.1950215 ])

## Prediction:

In [202]:
X_theta_pred = min_results['x']

# Reconstructing X:
X_pred = X_theta_pred[0:num_movies*num_features]
X_pred = X_pred.reshape((num_movies, num_features))

# Reconstructing theta:
theta_pred = X_theta_pred[num_movies*num_features:]
theta_pred = theta_pred.reshape((num_users, num_features))

# Predicting new_user:
predictions = np.dot(X_pred, theta_pred.T)
test = np.vstack((range(0,num_movies), predictions[:,-1].T)).T

new_user_df = pd.DataFrame(test)
new_user_df.columns = ["movie_id", "predicted_rating"]
new_user_df['predicted_rating'] = new_user_df['predicted_rating'] + y_mean
new_user_df = new_user_df.sort_values(by='predicted_rating', ascending=False)

movie = []

for i, movie_id in enumerate(new_user_df['movie_id']):
    movie.append(movie_titles[int(movie_id)])

new_user_df['movie'] = movie

new_user_df

Unnamed: 0,movie_id,predicted_rating,movie
49,49.0,4.461127,Star Wars
312,312.0,4.271429,Titanic
173,173.0,4.210526,Raiders of the Lost Ark
180,180.0,4.146209,Return of the Jedi
171,171.0,4.010637,"Empire Strikes Back, The"
21,21.0,3.913520,Braveheart
126,126.0,3.891142,"Godfather, The"
63,63.0,3.890768,"Shawshank Redemption, The"
95,95.0,3.790906,Terminator 2: Judgment Day
271,271.0,3.756946,Good Will Hunting


In [54]:
X.shape

(1683, 10)