In [1]:
from utils import *

# Load data

943 users, 1682 movies.

X is the features of movie.

$\theta$ is the parameters of user.

Y is a num_movies $\times$ num_users matrix, stores the ratings $y^{(i,j)}$ from 1 to 5.

R is a binary-valued indicator matrix, R(i,j)=1 if user j gave a rating to movie i, and R(i,j)=0 otherwise.

In [2]:
movies_mat = loadmat('data/ex8_movies.mat')
movies_mat.keys()

dict_keys(['__header__', '__version__', '__globals__', 'Y', 'R'])

In [3]:
Y, R = movies_mat['Y'], movies_mat['R']
Y.shape, R.shape

((1682, 943), (1682, 943))

In [4]:
param_mat = loadmat('data/ex8_movieParams.mat')
param_mat.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'Theta', 'num_users', 'num_movies', 'num_features'])

In [5]:
X, theta = param_mat['X'], param_mat['Theta']
X.shape, theta.shape

((1682, 10), (943, 10))

In [6]:
movie_idx = {}
with open('./data/movie_ids.txt', encoding='gbk') as f:
    for line in f:
        tokens = line.split(' ')
        tokens[-1] = tokens[-1][:-1]
        movie_idx[int(tokens[0]) - 1] = ' '.join(tokens[1:])

movie_idx[0], len(movie_idx)

('Toy Story (1995)', 1682)

# Cost Function

In [7]:
def serialize(X, theta):
    return np.concatenate((np.ravel(X), np.ravel(theta)))

def deserialize(param, n_movie, n_user, n_feature):
    return param[:n_movie * n_feature].reshape(n_movie, n_feature), \
            param[n_movie * n_feature:].reshape(n_user, n_feature)

\begin{align*}
J(x^{(1)},...,x^{(n_m)},\theta^{(1)},...,\theta^{(n_u)}) = \frac{1}{2}\sum_{(i,j):r(i,j)=1}\left((\theta^{(j)})^T x^{(i)} - y^{(i,j)}\right)^2
\end{align*}

In [8]:
def cost(param, Y, R, n_feature):
    '''
    X(movie, feature), (1682, 10)
    theta(user, feature), (943, 10)
    '''
    movie_size, user_size = Y.shape
    X, theta = deserialize(param, movie_size, user_size, n_feature)
    error = (X @ theta.T - Y) * R
    return (error ** 2).sum() / 2


\begin{align*}
J(x^{(1)},...,x^{(n_m)},\theta^{(1)},...,\theta^{(n_u)}) =& \frac{1}{2}\sum_{(i,j):r(i,j)=1}\left((\theta^{(j)})^T x^{(i)} - y^{(i,j)}\right)^2 \\
&+ \left(\frac{\lambda}{2}\sum_{j=1}^{n_u}\sum_{k=1}^n (\theta_k^{(j)})^2\right) \\
&+ \left(\frac{\lambda}{2}\sum_{i=1}^{n_m}\sum_{k=1}^n (x_k^{(i)})^2\right).
\end{align*}


In [9]:
def cost_reg(param, Y, R, n_feature, l = 1):
    movie_size, user_size = Y.shape
    X, theta = deserialize(param, movie_size, user_size, n_feature)
    return cost(param, Y, R, n_feature) + (l / 2) * np.sum(X ** 2) + (l / 2) * np.sum(theta ** 2)

In [10]:
param = serialize(X, theta)
cost(param, Y, R, 10), cost_reg(param, Y, R, 10, l=1)

(np.float64(27918.64012454421), np.float64(32520.682450229557))

# Gradient

\begin{align*}
\frac{\partial J}{\partial x_k^{(i)}} &= \sum_{j:r(i,j)=1} \left( (\theta^{(j)})^T x^{(i)} - y^{(i,j)} \right) \theta_k^{(j)} \\
\frac{\partial J}{\partial \theta_k^{(j)}} &= \sum_{i:r(i,j)=1} \left( (\theta^{(j)})^T x^{(i)} - y^{(i,j)} \right) x_k^{(i)}
\end{align*}

In [11]:
def gradient(param, Y, R, n_feature):
    movies_size, user_size = Y.shape
    X, theta = deserialize(param, movies_size, user_size, n_feature)

    error = (X @ theta.T - Y) * R
    X_grad = error @ theta
    theta_grad = error.T @ X
    return serialize(X_grad, theta_grad)

\begin{align*}
\frac{\partial J}{\partial x_k^{(i)}} &= \sum_{j:r(i,j)=1} \left( (\theta^{(j)})^T x^{(i)} - y^{(i,j)} \right) \theta_k^{(j)} + \lambda x_k^{(i)} \\
\frac{\partial J}{\partial \theta_k^{(j)}} &= \sum_{i:r(i,j)=1} \left( (\theta^{(j)})^T x^{(i)} - y^{(i,j)} \right) x_k^{(i)} + \lambda \theta_k^{(i)}
\end{align*}

In [12]:
def gradient_reg(param, Y, R, n_feature, l=1):
    return gradient(param, Y, R, n_feature) + l * param

# Add user
我们将使用练习中提供的评分。

In [13]:
ratings = np.zeros((1682, 1))

ratings[0] = 4
ratings[11] = 4
ratings[21] = 1
ratings[30] = 5
ratings[35] = 4
ratings[60] = 3
ratings[76] = 4
ratings[90] = 2
ratings[110] = 5
ratings[330] = 4

我们可以将自己的评级向量添加到现有数据集中以包含在模型中。

In [14]:
Y, R = movies_mat.get('Y'), movies_mat.get('R')
Y = np.append(Y, ratings, axis=1)
R = np.append(R, ratings != 0, axis=1)

Y.shape, R.shape, ratings.shape

((1682, 944), (1682, 944), (1682, 1))

我们不只是准备训练协同过滤模型。 我们只需要定义一些变量并对评级进行规一化。

In [24]:
movies_size, users_size = Y.shape
features = 10
learning_rate = 10

X = np.random.random(size=(movies_size, features))
theta = np.random.random(size=(users_size, features))
params = serialize(X, theta)

X.shape, theta.shape, params.shape

((1682, 10), (944, 10), (26260,))

In [25]:
Y_norm = Y - Y.mean()
Y.mean(), Y_norm.mean()

(np.float64(0.22233292690300085), np.float64(7.503665804193807e-17))

# Training

In [26]:
res = optimize.minimize(fun=cost_reg, x0=params, args=(Y_norm, R, features, learning_rate),
                  method='TNC', jac=gradient_reg)
res

 message: Converged (|f_n-f_(n-1)| ~= 0)
 success: True
  status: 1
     fun: 69381.53935273514
       x: [ 5.953e-01 -4.066e-01 ...  6.836e-01  4.152e-01]
     nit: 62
     jac: [ 5.590e-06  6.737e-06 ...  6.010e-07  4.320e-07]
    nfev: 1482

In [27]:
X_trained, theta_trained = deserialize(res.x, movies_size, users_size, features)
X_trained.shape, theta_trained.shape

((1682, 10), (944, 10))

In [28]:
prediction = X_trained @ theta_trained.T
user0_pred = prediction[:, 0] + Y.mean()
top_idx = np.argsort(user0_pred)[::-1] # descending order
top_idx.shape

(1682,)

In [29]:
user0_pred[top_idx][:10]

array([5.6963995 , 5.41540969, 5.38101216, 5.20402943, 5.16717062,
       5.13165185, 5.12067357, 5.05166792, 5.03479017, 5.02821375])

In [32]:
print('Top recommendations for you:')
movie_list = np.array(movie_idx)
for i in top_idx[:10]:
    print('Prediction rating {:.2f} for movie {}'.format(user0_pred[i], movie_idx[i]))
# for m in movie_list[top_idx][:10]:
#     print(m)

Top recommendations for you:
Prediction rating 5.70 for movie Star Wars (1977)
Prediction rating 5.42 for movie Wrong Trousers, The (1993)
Prediction rating 5.38 for movie Close Shave, A (1995)
Prediction rating 5.20 for movie Empire Strikes Back, The (1980)
Prediction rating 5.17 for movie Secrets & Lies (1996)
Prediction rating 5.13 for movie Fargo (1996)
Prediction rating 5.12 for movie Return of the Jedi (1983)
Prediction rating 5.05 for movie Blade Runner (1982)
Prediction rating 5.03 for movie Pulp Fiction (1994)
Prediction rating 5.03 for movie Lawrence of Arabia (1962)
