In [1]:
import numpy as np
import scipy.optimize as op

In [2]:
n_movies=10
n_users=5
n_features=3 # comedy,action,romance etc. applies to both movie and user

In [3]:
# movies x user rating matrix where ratings are from 1 to 10
#each column is a single users rating of all movies
# 0 value if the user has not rated the movie
ratings =np.array([
 [8, 4, 0, 0, 4],
 [0, 0, 8, 10, 4],
 [8, 10, 0, 0, 6],
 [10, 10, 8, 10, 10],
 [0, 0, 0, 0, 0],
 [2, 0, 4, 0, 6],
 [8, 6, 4, 0, 0],
 [0, 0, 6, 4, 0],
 [0, 6, 0, 4, 10],
 [0, 4, 6, 8, 8]
])

In [4]:
print ratings
print ratings.shape

[[ 8  4  0  0  4]
 [ 0  0  8 10  4]
 [ 8 10  0  0  6]
 [10 10  8 10 10]
 [ 0  0  0  0  0]
 [ 2  0  4  0  6]
 [ 8  6  4  0  0]
 [ 0  0  6  4  0]
 [ 0  6  0  4 10]
 [ 0  4  6  8  8]]
(10, 5)


In [5]:
did_rate=(ratings!=0)*1
print did_rate
print did_rate.shape

[[1 1 0 0 1]
 [0 0 1 1 1]
 [1 1 0 0 1]
 [1 1 1 1 1]
 [0 0 0 0 0]
 [1 0 1 0 1]
 [1 1 1 0 0]
 [0 0 1 1 0]
 [0 1 0 1 1]
 [0 1 1 1 1]]
(10, 5)


In [6]:
movies={
1:"Harold and Kumar Escape From Guantanamo Bay (2008)",
2:"Ted (2012)",
3:"Straight Outta Compton (2015)",
4:"A Very Harold and Kumar Christmas (2011)",
5:"Notorious (2009)",
6:"Get Rich Or Die Tryin' (2005)",
7:"Frozen (2013)",
8:"Tangled (2010)",
9:"Cinderella (2015)",
10:"Toy Story 3 (2010)"
}

In [7]:
sample_rating=np.zeros((10,1)) #column vector
sample_rating[0] = 8
sample_rating[4] = 7
sample_rating[7] = 3

In [8]:
ratings =np.append(sample_rating,ratings, axis=1) # axis=1 append as column. i.e first column here
did_rate = np.append(((sample_rating != 0) * 1), did_rate, axis = 1)

In [13]:
print ratings
print did_rate

[[  7.   8.   4.   0.   0.   4.]
 [  0.   0.   0.   8.  10.   4.]
 [  0.   8.  10.   0.   0.   6.]
 [  0.  10.  10.   8.  10.  10.]
 [  8.   0.   0.   0.   0.   0.]
 [  0.   2.   0.   4.   0.   6.]
 [  0.   8.   6.   4.   0.   0.]
 [  3.   0.   0.   6.   4.   0.]
 [  0.   0.   6.   0.   4.  10.]
 [  0.   0.   4.   6.   8.   8.]]
[[1 1 1 0 0 1]
 [0 0 0 1 1 1]
 [0 1 1 0 0 1]
 [0 1 1 1 1 1]
 [1 0 0 0 0 0]
 [0 1 0 1 0 1]
 [0 1 1 1 0 0]
 [1 0 0 1 1 0]
 [0 0 1 0 1 1]
 [0 0 1 1 1 1]]


In [21]:
def mean_normalize_ratings(ratings,did_rate):
    n_movies=ratings.shape[0]
    movie_means=np.zeros(shape=(n_movies,1))
    normalized_ratings=np.zeros(shape=ratings.shape)
    
    for i in range(n_movies):
        index=np.where(did_rate[i]==1)[0]
        movie_means[i] =np.mean(ratings[i, index])
        normalized_ratings[i, index] = ratings[i, index] - movie_means[i]
        
    return (normalized_ratings, movie_means)
    

In [22]:
normalized_ratings,movie_means=mean_normalize_ratings(ratings,did_rate)

In [23]:
print normalized_ratings
print movie_means

[[ 1.25        2.25       -1.75        0.          0.         -1.75      ]
 [ 0.          0.          0.          0.66666667  2.66666667 -3.33333333]
 [ 0.          0.          2.          0.          0.         -2.        ]
 [ 0.          0.4         0.4        -1.6         0.4         0.4       ]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.         -2.          0.          0.          0.          2.        ]
 [ 0.          2.          0.         -2.          0.          0.        ]
 [-1.33333333  0.          0.          1.66666667 -0.33333333  0.        ]
 [ 0.          0.         -0.66666667  0.         -2.66666667  3.33333333]
 [ 0.          0.         -2.5        -0.5         1.5         1.5       ]]
[[ 5.75      ]
 [ 7.33333333]
 [ 8.        ]
 [ 9.6       ]
 [ 8.        ]
 [ 4.        ]
 [ 6.        ]
 [ 4.33333333]
 [ 6.66666667]
 [ 6.5       ]]


In [44]:
def cost_function(X_and_theta, ratings, did_rate, num_users, num_movies, num_features, reg_param):
	# Retrieve the X and theta matrixes from X_and_theta, based on their dimensions (num_features, num_movies, num_movies)
	# --------------------------------------------------------------------------------------------------------------
	# Get the first 30 (10 * 3) rows in the 48 X 1 column vector
	first_30 = X_and_theta[:num_movies * num_features]
	# Reshape this column vector into a 10 X 3 matrix
	X = first_30.reshape((num_features, num_movies)).transpose()
	# Get the rest of the 18 the numbers, after the first 30
	last_18 = X_and_theta[num_movies * num_features:]
	# Reshape this column vector into a 6 X 3 matrix
	theta = last_18.reshape(num_features, num_users ).transpose()
	
	# we multiply by did_rate because we only want to consider observations for which a rating was given
	# we calculate the sum of squared errors here.  
	# in other words, we calculate the squared difference between our hypothesis (predictions) and ratings
	cost = sum( (X.dot( theta.T ) * did_rate - ratings) ** 2 ) / 2
	
	# we get the sum of the square of every element of X and theta
	regularization = (reg_param / 2) * (sum( theta**2 ) + sum(X**2))
	return cost + regularization

In [51]:
def compute_gradient(X_and_theta, ratings, did_rate, num_users, num_movies, num_features, reg_param):
	# Retrieve the X and theta matrixes from X_and_theta, based on their dimensions (num_features, num_movies, num_movies)
	# --------------------------------------------------------------------------------------------------------------
	# Get the first 30 (10 * 3) rows in the 48 X 1 column vector
	first_30 = X_and_theta[:num_movies * num_features]
	# Reshape this column vector into a 10 X 3 matrix
	X = first_30.reshape((num_features, num_movies)).transpose()
	# Get the rest of the 18 the numbers, after the first 30
	last_18 = X_and_theta[num_movies * num_features:]
	# Reshape this column vector into a 6 X 3 matrix
	theta = last_18.reshape(num_features, num_users ).transpose()
	
	# we multiply by did_rate because we only want to consider observations for which a rating was given
	difference = X.dot( theta.T ) * did_rate - ratings
	
	# we calculate the gradients (derivatives) of the cost with respect to X and theta
	X_grad = difference.dot( theta ) + reg_param * X
	theta_grad = difference.T.dot( X ) + reg_param * theta
	
	# wrap the gradients back into a column vector 
	return np.r_[X_grad.T.flatten(), theta_grad.T.flatten()]

In [57]:
num_movies, num_users = np.shape(normalized_ratings)
num_features = 3

# Initialize Parameters theta (user_prefs), X (movie_features)

movie_features = np.random.randn( num_movies, num_features )
user_prefs = np.random.randn( num_users, num_features )

In [63]:
initial_X_and_theta = np.r_[movie_features.T.flatten(), user_prefs.T.flatten()]

ValueError: operands could not be broadcast together with shapes (6,) (3,) 