# 2 Recommender Systems

In [1]:
import numpy as np
from scipy.io import loadmat
from utils import *

## 2.1 Movie ratings dataset

In [2]:
data = loadmat('../data/ex8_movies.mat')
Y = data['Y'] * -1 * -1 # uint8 -> int16
R = data['R'] * -1 * -1 # uint8 -> int16

print('Toy Story average rating: %.1f' % Y[0,R[0,:]].mean())

Toy Story average rating: 4.5


## 2.2 Collaborative filtering learning algorithm
code in utils.py

## 2.3 Learning movie recommendations

In [9]:
movie_names = get_movie_names()

my_ratings = np.zeros(len(movie_names))

# rate the folwing movies:
my_ratings[0] = 1 # Toy Story (1995)
my_ratings[97] = 5 # Twelve Monkeys (1995)
my_ratings[6] = 5 # Usual Suspects, The (1995)
my_ratings[11] = 5 # Outbreak (1995)
my_ratings[53] = 1 # Shawshank Redemption, The (1994)
my_ratings[63]= 1 # While You Were Sleeping (1995)
my_ratings[65]= 1 # Forrest Gump (1994)
my_ratings[68] = 5 # Silence of the Lambs, The (1991)
my_ratings[182] = 5 # Alien (1979)
my_ratings[225] = 5 # Die Hard 2 (1990)
my_ratings[354] = 5 # Sphere (1998)
my_ratings[20] = 1 # Muppet Treasure Island (1996)

print('New user ratings:')
for i in range(len(movie_names)):
    if my_ratings[i] > 0:
        print('Rated %d for %s' % (my_ratings[i], movie_names[i]))

New user ratings:
Rated 1 for Toy Story (1995)
Rated 5 for Twelve Monkeys (1995)
Rated 5 for Usual Suspects, The (1995)
Rated 1 for Muppet Treasure Island (1996)
Rated 1 for Outbreak (1995)
Rated 1 for Shawshank Redemption, The (1994)
Rated 1 for While You Were Sleeping (1995)
Rated 5 for Forrest Gump (1994)
Rated 5 for Silence of the Lambs, The (1991)
Rated 5 for Alien (1979)
Rated 5 for Die Hard 2 (1990)
Rated 5 for Sphere (1998)


In [10]:
# add a new user
newY = np.c_[Y, my_ratings]
newR = np.c_[R, (my_ratings>0)*1]

n_m, n_u = newY.shape # number of movies, number of users
n_f = 10 # number of features

# mean rating of each movie
meanY = np.zeros(n_m)
for i in range(n_m):
    meanY[i] = newY[i,newR[i,:]].mean()
# normelize data
newY = (newY.T - meanY).T

# train model
l=10
init_parameters = np.random.rand(n_m*n_f + n_u*n_f)
parameters = train(init_parameters, newY, newR, (n_m, n_f), (n_u,n_f), l)

X, Theta = unroll(parameters, (n_m, n_f), (n_u,n_f))

In [11]:
my_recommendetions = X.dot(Theta.T)[:,-1] + meanY
sort_idxs = np.argsort(my_recommendetions)[::-1]

num_top = 10
print('%d Top recommendations for you:' % num_top)
for i in sort_idxs[:num_top]:
    print('Predicting rating %.1f for movie %s' % (my_recommendetions[i], movie_names[i]))


10 Top recommendations for you:
Predicting rating 4.9 for movie Maya Lin: A Strong Clear Vision (1994)
Predicting rating 4.8 for movie Godfather, The (1972)
Predicting rating 4.6 for movie Horseman on the Roof, The (Hussard sur le toit, Le) (1995)
Predicting rating 4.6 for movie Fargo (1996)
Predicting rating 4.5 for movie Wallace & Gromit: The Best of Aardman Animation (1996)
Predicting rating 4.4 for movie Wrong Trousers, The (1993)
Predicting rating 4.4 for movie Star Wars (1977)
Predicting rating 4.4 for movie Manon of the Spring (Manon des sources) (1986)
Predicting rating 4.4 for movie Haunted World of Edward D. Wood Jr., The (1995)
Predicting rating 4.4 for movie Priest (1994)
