In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as ios
import pandas as pd

In [2]:
path_1 = 'ex8_movies.mat'
path_2 = 'ex8_movieParams.mat'
movies = ios.loadmat(path_1)
moviesParams = ios.loadmat(path_2)
print(movies.keys(),moviesParams.keys())
Y = movies['Y']
R = movies['R']
X = moviesParams['X']
Theta = moviesParams['Theta']
num_users = moviesParams['num_users']
num_features = moviesParams['num_features']
num_movies = moviesParams['num_movies']
print("Y.shape is",Y.shape)
print("R.shape is",R.shape)
print("X.shape is",X.shape)
print("theta.shape is",Theta.shape)
print("num_movies",num_movies)
print("num_users",num_users)
print("num_features",num_features)

dict_keys(['__header__', '__version__', '__globals__', 'Y', 'R']) dict_keys(['__header__', '__version__', '__globals__', 'X', 'Theta', 'num_users', 'num_movies', 'num_features'])
Y.shape is (1682, 943)
R.shape is (1682, 943)
X.shape is (1682, 10)
theta.shape is (943, 10)
num_movies [[1682]]
num_users [[943]]
num_features [[10]]


In [3]:
movie_list = []
with open('movie_ids.txt',encoding='ANSI') as file:
    for line in file:
        sentence = line.strip().split(' ')
        movie_list.append(' '.join(sentence[1:]))

In [4]:
def serialize(X,theta):
    return np.concatenate((X.ravel(),theta.ravel()))
def deserialize(param,n_users,n_movies,n_feature):
    return param[:n_movies*n_feature].reshape(n_movies,n_feature),param[n_movies*n_feature:].reshape(n_users,n_feature)

In [5]:
def cost_fun(param,Y,R,n_feature):
    n_movies,n_users = Y.shape
    X,theta = deserialize(param,n_users,n_movies,n_feature)
    # print(X.shape)
    # print(theta.shape)
    return 1/2*np.sum(np.power((X@(theta.T)-Y)*R,2))

In [6]:
def regularized_cost(param,Y,R,n_feature,lamda=1):
    reg = np.sum(param**2)*lamda/2
    return cost_fun(param,Y,R,n_feature) + reg

In [7]:
def gradient(param,Y,R,n_feature):
    n_movies,n_users = Y.shape
    X,theta = deserialize(param,n_users,n_movies,n_feature)
    grad_x = ((X@theta.T-Y)*R)@theta
    grad_theta = ((X@theta.T-Y)*R).T@X
    return serialize(grad_x,grad_theta)

In [8]:
def regularized_gradient(param,Y,R,n_feature,lamda=1):
    gra = gradient(param,Y,R,n_feature)
    return gra + lamda * param

In [9]:
ratings = np.zeros(1682)
ratings[0] = 4
ratings[6] = 3
ratings[11] = 5
ratings[53] = 4
ratings[63] = 5
ratings[65] = 3
ratings[68] = 5
ratings[97] = 2
ratings[182] = 4
ratings[225] = 5
ratings[354] = 5

In [10]:
Y = np.insert(Y,0,ratings,axis=1)
R = np.insert(R,0,ratings!=0,axis=1)
print("Y.shape is",Y.shape)
print("R.shape is",R.shape)

Y.shape is (1682, 944)
R.shape is (1682, 944)


In [11]:
X_train = np.random.standard_normal((num_movies[0,0],num_features[0,0]))
theta_train = np.random.standard_normal((num_users[0,0]+1,num_features[0,0]))
lamda=10


In [12]:
param = serialize(X_train,theta_train)
Y_norm = Y - Y.mean()

In [13]:
import scipy.optimize as opt

In [14]:
result = opt.minimize(fun=regularized_cost,x0=param,args=(Y,R,num_features[0,0],lamda),method='TNC',jac=regularized_gradient)
result

 message: Converged (|f_n-f_(n-1)| ~= 0)
 success: True
  status: 1
     fun: 71900.03071974813
       x: [ 4.410e-01  4.798e-01 ... -6.730e-01  4.327e-01]
     nit: 45
     jac: [ 1.737e-06  2.330e-06 ... -8.947e-07  1.065e-06]
    nfev: 1142

In [15]:
out_param = result.x
X_out,theta_out = deserialize(out_param,num_users[0,0]+1,num_movies[0,0],num_features[0,0])
prediction = X_out @ theta_out.T
userpre_me = prediction[:,0]+Y.mean()
top_index = np.argsort(userpre_me)[::-1]


In [16]:
movies_score = userpre_me[top_index][:10]
print(movies_score)

[4.42423566 4.41634854 4.20856053 4.16140482 4.13552866 4.13216082
 4.12794958 4.12771555 4.01144238 4.00434947]


In [17]:
movies_list = np.array(movie_list)
for i in top_index[:10]:
    print("The movies is {}".format(movies_list[i]))

The movies is Titanic (1997)
The movies is Star Wars (1977)
The movies is Raiders of the Lost Ark (1981)
The movies is Return of the Jedi (1983)
The movies is Shawshank Redemption, The (1994)
The movies is Good Will Hunting (1997)
The movies is Braveheart (1995)
The movies is Empire Strikes Back, The (1980)
The movies is Schindler's List (1993)
The movies is Godfather, The (1972)
