In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
import scipy.optimize as op

## 2 Recommender Systems
### 2.1 Movie ratings dataset
The matrix Y (a num movies×num users matrix) stores the ratings y(i,j) (from 1 to 5).
The matrix R is an binary-valued indicator matrix, where R(i,j) = 1 if user j gave a rating to movie i, and R(i,j) = 0 otherwise. 
### 2.2 Collaborative ﬁltering learning algorithm
Note that the parameters to the function (i.e., the values that you are trying to learn) are X and Theta. In order to use an oﬀ-the-shelf minimizer such as fmincg, the cost function has been set up to unroll the parameters into a single vector params. You had previously used the same vector unrolling method in the neural networks programming exercise.
- **2.2.1 Collaborative ﬁltering cost function**  
You should return this cost in the variable J. Note that you should be accumulating the cost for user j and movie i only if R(i,j) = 1. 

In [2]:
def cofiCostFunc(params, Y, R, num_users, num_movies, num_features, lam):
    X = params[:num_movies*num_features].reshape(num_movies, num_features)
    Theta = params[num_movies*num_features:].reshape(num_users, num_features)
    '''
    J = 0
    for i in range(num_movies):
        for j in range(num_users):
            if R[i, j]:
                J = J + 0.5*(np.dot(Theta[j,:], X[i,:]) - Y[i, j])**2
    '''
    J = 0.5 * np.sum(R * ((np.dot(X, Theta.T)-Y)**2))  # 与R的对应元素相乘可以去除掉未评分的cost值
    J = J + (lam/2)*np.sum(X**2) + (lam/2)*np.sum(Theta**2)
    return J

In [None]:
    '''
    for k in range(num_features):
        for i in range(num_movies):
            for j in range(num_users):
                if R[i, j]:
                    X_grad[i, k] = X_grad[i, k] + (np.dot(Theta[j,:], X[i,:]) - Y[i, j])*Theta[j, k] + lam*X[i,k]
                    Theta_grad[j, k] = Theta_grad[j, k] + (np.dot(Theta[j,:], X[i,:]) - Y[i, j])*X[i, k] + lam*Theta[j,k]
    '''

In [15]:
def cofiGradient(params, Y, R, num_users, num_movies, num_features, lam):
    X = params[:num_movies*num_features].reshape(num_movies, num_features)
    Theta = params[num_movies*num_features:].reshape(num_users, num_features)
    X_grad = np.zeros_like(X)
    Theta_grad = np.zeros_like(Theta)

    for i in range(num_movies):
        idx = np.where(R[i, :] == 1)
        theta_tmp = Theta[idx[0], :]
        y_tmp = Y[i, idx[0]]
        X_grad[i, :] = np.dot((np.dot(X[i,:], theta_tmp.T) - y_tmp), theta_tmp) + lam*X[i,:]
    for i in range(num_users):
        idx = np.where(R[:, i] == 1)
        x_tmp = X[idx[0], :]
        y_tmp = Y[idx[0], i]
        Theta_grad[i, :] = np.dot((np.dot(x_tmp, Theta[i,:].T) - y_tmp).T, x_tmp) + lam*Theta[i,:]
    
    grad = np.append(X_grad.ravel(), Theta_grad.ravel())
    return grad

In [16]:
def loadMovieList():
    movieList = {}
    with open("movie_ids.txt", 'r') as fo:
        for line in fo:
            line = line.strip()
            for i in range(len(line)):
                if line[i] == ' ':
                    break
            movieList[int(line[:i])] = line[i+1:]
    return movieList

In [70]:
#def main():
data = sio.loadmat("ex8_movies.mat")
Y, R = data['Y'], data["R"]
print(Y.shape, R.shape)
data2 = sio.loadmat("ex8_movieParams.mat")
X, Theta = data2['X'], data2['Theta']
print(X.shape, Theta.shape)
num_users = 4
num_movies = 5
num_features = 3
X = X[:num_movies, :num_features]
Theta = Theta[:num_users, :num_features]
Y = Y[:num_movies, :num_users]
R = R[:num_movies, :num_users]
print(Y.shape, R.shape)
print(X.shape, Theta.shape)
params = np.append(X.ravel(), Theta.ravel())
J = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 1.5)
print(J)

(1682, 943) (1682, 943)
(1682, 10) (943, 10)
(5, 4) (5, 4)
(5, 3) (4, 3)
31.344056244274217


### 2.3 Learning movie recommendations

In [5]:
# Part 6: Entering ratings for a new user
movieList = loadMovieList()
my_ratings = np.zeros(1682)
my_ratings[1] = 4
my_ratings[98] = 2
my_ratings[7] = 3
my_ratings[12] = 5
my_ratings[54] = 4
my_ratings[64] = 5
my_ratings[66] = 3
my_ratings[69] = 5
my_ratings[183] = 4
my_ratings[226] = 5
my_ratings[355] = 5
for i in range(my_ratings.size):
    if my_ratings[i] > 0:
        print("Rated {:.0f} for {}".format(my_ratings[i], movieList[i]))

Rated 4 for Toy Story (1995)
Rated 3 for Twelve Monkeys (1995)
Rated 5 for Usual Suspects, The (1995)
Rated 4 for Outbreak (1995)
Rated 5 for Shawshank Redemption, The (1994)
Rated 3 for While You Were Sleeping (1995)
Rated 5 for Forrest Gump (1994)
Rated 2 for Silence of the Lambs, The (1991)
Rated 4 for Alien (1979)
Rated 5 for Die Hard 2 (1990)
Rated 5 for Sphere (1998)


In [6]:
# Part 7: Learning Movie Ratings
data = sio.loadmat("ex8_movies.mat")
Y, R = data['Y'], data["R"]
print(Y.shape, R.shape)
Y = np.hstack((my_ratings.reshape(1682,1), Y))
R = np.hstack(((my_ratings > 0).reshape(1682,1),R))

(1682, 943) (1682, 943)


In [7]:
# Preprocess data by subtracting mean rating for every movie (every row)
def normalizeRatings(Y, R):
    m, n = Y.shape
    Ymean = np.zeros(m)
    Ynorm = np.zeros_like(Y)
    for i in range(m):
        idx = np.where(R[i,:] == 1)
        Ymean[i] = np.mean(Y[i,:][idx])
        Ynorm[i,idx] = Y[i,idx] - Ymean[i]
    return Ynorm, Ymean

In [8]:
Ynorm, Ymean = normalizeRatings(Y, R)
num_movies, num_users = Y.shape
num_features = 10
X = np.random.randn(num_movies, num_features)
Theta = np.random.randn(num_users, num_features)

In [18]:
lam = 10
params = np.append(X.ravel(), Theta.ravel())
result = op.minimize(fun = cofiCostFunc, x0 = params, \
        args = (Y, R, num_users, num_movies, num_features, lam), method = 'TNC', jac = cofiGradient)

In [19]:
# Part 8: Recommendation for you
Xres = result.x[:num_movies*num_features].reshape(num_movies, num_features)
Theta_res = result.x[num_movies*num_features:].reshape(num_users, num_features)
p = np.dot(Xres, Theta_res.T)
my_predictions = p[:, 1] + Ymean
idx = np.argsort(- my_predictions)  # 获得降序排列的索引值
for i in idx[:10]:
    print("Rated {:.1f} for {}".format(my_predictions[i], movieList[i]))

Rated 9.9 for Monty Python and the Holy Grail (1974)
Rated 9.9 for Spy Hard (1996)
Rated 9.8 for I.Q. (1994)
Rated 9.6 for Snow White and the Seven Dwarfs (1937)
Rated 9.4 for Horseman on the Roof, The (Hussard sur le toit, Le) (1995)
Rated 9.3 for Tin Cup (1996)
Rated 9.3 for In & Out (1997)
Rated 9.2 for Men in Black (1997)
Rated 9.2 for Some Like It Hot (1959)
Rated 9.2 for Seven (Se7en) (1995)
