In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as scio
import scipy.optimize as opt

In [3]:
data = scio.loadmat("ex8_movies.mat")
Y = data['Y']
R = data['R']

In [27]:
def cofiCostFunction(params, Y, R, num_movies, num_users, num_features, Lambda):
    len1 = num_features * num_movies
    len2 = num_features * num_users
    X = params[0:len1].reshape(num_movies, num_features)
    Theta = params[len1:len1+len2].reshape(num_users, num_features)
    s1 = np.sum(np.power(np.dot(X, Theta.T) - Y, 2) * R) / 2
    s2 = np.sum(np.power(X, 2)) * Lambda / 2
    s3 = np.sum(np.power(Theta, 2)) * Lambda / 2
    J = s1 + s2 + s3
    return J

In [6]:
data2 = scio.loadmat("ex8_movieParams.mat")
X = data2['X']
Theta = data2['Theta']

In [11]:
num_users = 4
num_movies = 5
num_features = 3
tmpX = X[0:num_movies, 0:num_features]
tmpTheta = Theta[0:num_users, 0:num_features]
len1 = num_movies * num_features
len2 = num_users * num_features
params = np.vstack((tmpX.reshape(len1, 1), tmpTheta.reshape(len2, 1))).ravel()
tmpY = Y[0:num_movies, 0:num_users]
tmpR = R[0:num_movies, 0:num_users]

In [13]:
J = cofiCostFunction(params, tmpY, tmpR, num_movies, num_users, num_features,0)
J

22.224603725685675

In [28]:
def cofiGradient(params, Y, R, num_movies, num_users, num_features, Lambda):
    len1 = num_movies * num_features
    len2 = num_users * num_features
    X = params[0:len1].reshape(num_movies, num_features)
    Theta = params[len1:len1+len2].reshape(num_users, num_features)
    s1 = np.dot((np.dot(X, Theta.T) - Y) * R, Theta)
    gradX = s1 + Lambda * X
    s2 = np.dot(((np.dot(X, Theta.T) - Y) * R).T, X)
    gradTheta = s2 + Lambda * Theta
    grad = np.vstack((gradX.reshape(len1, 1), gradTheta.reshape(len2, 1))).ravel()
    return grad

In [18]:
def computeNumericalGradient(Theta, Y, R, num_movies, num_users, num_features, Lambda):
    e = 1e-4
    m = Theta.shape[0]
    perturb = np.zeros(m)
    numgrad = np.zeros(m)
    for i in range(m):
        perturb[i] = e
        loss1 = Theta - perturb
        loss2 = Theta + perturb
        numgrad[i] = (cofiCostFunction(loss2, Y, R, num_movies, num_users, num_features, Lambda) - cofiCostFunction(loss1, Y, R, num_movies, num_users, num_features, Lambda)) / (2 * e)
        perturb[i] = 0
    return numgrad

In [29]:
def checkCostFunction(Lambda = 0):
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)
    Y = np.dot(X_t, Theta_t.T)
    Y_t = Y <= 0.5
    Y = Y * Y_t
    R = Y != 0
    X = np.random.randn(4, 3)
    Theta = np.random.randn(5, 3)
    params = np.vstack((X.reshape(4*3, 1), Theta.reshape(5*3, 1))).ravel()
    numgrad = computeNumericalGradient(params, Y, R, 4, 5, 3, Lambda)
    grad = cofiGradient(params, Y, R, 4, 5, 3, Lambda)
    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    return diff

In [31]:
checkCostFunction()

4.800769475804007e-13

In [32]:
J = cofiCostFunction(params, tmpY, tmpR, num_movies, num_users, num_features, 1.5)
J

31.344056244274217

In [33]:
checkCostFunction(1.5)

1.5238976955726712e-12

In [62]:
def loadMovieList():
    with open("movie_ids.txt", encoding='gb18030', errors='ignore') as f:
        movies = f.readlines()
    movieList = []
    for i in range(len(movies)):
        tmpList = movies[i].split(' ')
        name = ' '.join(tmpList[1:len(tmpList)]).strip()
        movieList.append(name)
    return movieList

In [65]:
movieList = loadMovieList()
m = len(movieList)

In [66]:
my_ratings = np.zeros(m)
my_ratings[0] = 4
my_ratings[97] = 2
my_ratings[6] = 3
my_ratings[11]= 5
my_ratings[53] = 4
my_ratings[63]= 5
my_ratings[65]= 3
my_ratings[68] = 5
my_ratings[182] = 4
my_ratings[225] = 5
my_ratings[354]= 5

In [68]:
print("New user ratings:")
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print("Rated %d for %s"%(my_ratings[i], movieList[i]))

New user ratings:
Rated 4 for Toy Story (1995)
Rated 3 for Twelve Monkeys (1995)
Rated 5 for Usual Suspects, The (1995)
Rated 4 for Outbreak (1995)
Rated 5 for Shawshank Redemption, The (1994)
Rated 3 for While You Were Sleeping (1995)
Rated 5 for Forrest Gump (1994)
Rated 2 for Silence of the Lambs, The (1991)
Rated 4 for Alien (1979)
Rated 5 for Die Hard 2 (1990)
Rated 5 for Sphere (1998)


In [77]:
def normalizeRatings(Y, R):
    m = Y.shape[0]
    Ymean = np.sum(Y, axis = 1) / np.sum(R, axis = 1)
    Ynorm = Y - Ymean.reshape(m, 1)
    Ynorm = Ynorm * R + 0
    return Ynorm, Ymean

In [85]:
Y = np.hstack((my_ratings.reshape(m, 1), Y))
R = np.hstack((my_ratings.reshape(m, 1) > 0, R))
Ynorm, Ymean = normalizeRatings(Y, R)

In [90]:
num_movies, num_users = Y.shape
num_features = 10
Lambda = 10
X = np.random.randn(num_movies, num_features)
Theta = np.random.randn(num_users, num_features)
len1 = num_movies * num_features
len2 = num_users * num_features

In [91]:
init_params = np.vstack((X.reshape(len1, 1), Theta.reshape(len2, 1))).reshape(-1)
params = opt.fmin_cg(f = cofiCostFunction, x0 = init_params, fprime = cofiGradient, args = (Y, R, num_movies, num_users, num_features, Lambda),
                    maxiter = 100, disp = False)

In [93]:
X = params[0:len1].reshape(num_movies, num_features)
Theta = params[len1:len1+len2].reshape(num_users, num_features)
pred = np.dot(X, Theta.T) + Ymean.reshape(m, 1)
new_ratings = pred[:,0]

In [103]:
idx = sorted(enumerate(new_ratings), key=lambda x:x[1], reverse = True)

In [110]:
print("Top recommendations for you:")
for i in range(10):
    print("Predicting rating %.1f for movie %s"%(new_ratings[idx[i][0]], movieList[idx[i][0]]))
print()
print("Original ratings provided:")
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print("Rated %d for %s"%(my_ratings[i], movieList[i]))

Top recommendations for you:
Predicting rating 8.5 for movie Star Wars (1977)
Predicting rating 8.4 for movie Titanic (1997)
Predicting rating 8.3 for movie Shawshank Redemption, The (1994)
Predicting rating 8.2 for movie Raiders of the Lost Ark (1981)
Predicting rating 8.2 for movie Schindler's List (1993)
Predicting rating 8.1 for movie Good Will Hunting (1997)
Predicting rating 8.1 for movie Usual Suspects, The (1995)
Predicting rating 8.0 for movie Braveheart (1995)
Predicting rating 8.0 for movie Empire Strikes Back, The (1980)
Predicting rating 8.0 for movie Godfather, The (1972)

Original ratings provided:
Rated 4 for Toy Story (1995)
Rated 3 for Twelve Monkeys (1995)
Rated 5 for Usual Suspects, The (1995)
Rated 4 for Outbreak (1995)
Rated 5 for Shawshank Redemption, The (1994)
Rated 3 for While You Were Sleeping (1995)
Rated 5 for Forrest Gump (1994)
Rated 2 for Silence of the Lambs, The (1991)
Rated 4 for Alien (1979)
Rated 5 for Die Hard 2 (1990)
Rated 5 for Sphere (1998)
