### Implementing FunkSVD

In [34]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse
import svd_tests as t
%matplotlib inline

# Read in the datasets
movies = pd.read_csv('data/movies_clean.csv')
reviews = pd.read_csv('data/reviews_clean.csv')

del movies['Unnamed: 0']
del reviews['Unnamed: 0']

# Create user-by-item matrix
#user_items = reviews[['user_id', 'movie_id', 'rating', 'timestamp']]
#user_by_movie = user_items.groupby(['user_id', 'movie_id'])['rating'].max().unstack()
# As it was showing memory error for unstacking, I took the reatings_mat from the Udacity help
ratings_mat = np.matrix([[ 10.,10.,10.,10.],
 [ 10,4,9,10],
 [  8,9,10,5],
 [  9,8,10,10],
 [ 10,5,9,9],
 [  6,4,10,6],
 [  9,8,10,9],
 [ 10,5,9,8],
 [  7,8,10,8],
 [  9,5,9,7],
 [  9,8,10,8],
 [  9,10,10,9],
 [ 10,9,10,8],
 [  5,8,5,8],
 [ 10,8,10,10],
 [  9,9,10,10],
 [  9,8,8,8],
 [ 10,8,1,10],
 [  5,6,10,10],
 [  8,7,10,7]])

In [35]:
ratings_mat

matrix([[10., 10., 10., 10.],
        [10.,  4.,  9., 10.],
        [ 8.,  9., 10.,  5.],
        [ 9.,  8., 10., 10.],
        [10.,  5.,  9.,  9.],
        [ 6.,  4., 10.,  6.],
        [ 9.,  8., 10.,  9.],
        [10.,  5.,  9.,  8.],
        [ 7.,  8., 10.,  8.],
        [ 9.,  5.,  9.,  7.],
        [ 9.,  8., 10.,  8.],
        [ 9., 10., 10.,  9.],
        [10.,  9., 10.,  8.],
        [ 5.,  8.,  5.,  8.],
        [10.,  8., 10., 10.],
        [ 9.,  9., 10., 10.],
        [ 9.,  8.,  8.,  8.],
        [10.,  8.,  1., 10.],
        [ 5.,  6., 10., 10.],
        [ 8.,  7., 10.,  7.]])

In [36]:
n_users = ratings_mat.shape[0]
n_movies = ratings_mat.shape[1]
num_ratings = np.count_nonzero(~np.isnan(ratings_mat))

In [37]:
np.count_nonzero(~np.isnan(ratings_mat))

80

In [38]:
latent_features = 4
user_mat = np.random.rand(n_users, latent_features)
movie_mat = np.random.rand(latent_features, n_movies)

In [39]:
user_mat

array([[0.41592958, 0.91074776, 0.13715951, 0.13950652],
       [0.68794809, 0.64322323, 0.2033438 , 0.63594236],
       [0.77000611, 0.47690021, 0.95629297, 0.03021415],
       [0.08769113, 0.67490199, 0.64902301, 0.90300225],
       [0.41984941, 0.48358001, 0.7839095 , 0.55335899],
       [0.42119524, 0.08275728, 0.81587947, 0.51768745],
       [0.13528771, 0.15952936, 0.51865242, 0.9615383 ],
       [0.92697871, 0.70026089, 0.21735514, 0.68289178],
       [0.54223823, 0.54986463, 0.78370039, 0.59175358],
       [0.04328393, 0.67360273, 0.46285024, 0.30630013],
       [0.61558797, 0.97402778, 0.64268428, 0.92342318],
       [0.66333601, 0.52046632, 0.74143139, 0.4124755 ],
       [0.84671791, 0.32212777, 0.9861748 , 0.53267404],
       [0.7849902 , 0.86818681, 0.72788609, 0.1664334 ],
       [0.35338501, 0.41978949, 0.55341719, 0.54127467],
       [0.49000192, 0.82916388, 0.20042757, 0.02068964],
       [0.30634658, 0.25843277, 0.10196618, 0.52149737],
       [0.88982691, 0.12526381,

In [40]:
def FunkSVD(ratings_mat, latent_features=4, learning_rate=0.0001, iters=100):
    '''
    This function performs matrix factorization using a basic form of FunkSVD with no regularization
    
    INPUT:
    ratings_mat - (numpy array) a matrix with users as rows, movies as columns, and ratings as values
    latent_features - (int) the number of latent features used
    learning_rate - (float) the learning rate 
    iters - (int) the number of iterations
    
    OUTPUT:
    user_mat - (numpy array) a user by latent feature matrix
    movie_mat - (numpy array) a latent feature by movie matrix
    '''
    
    # Set up useful values to be used through the rest of the function
    n_users = ratings_mat.shape[0]
    n_movies = ratings_mat.shape[1]
    num_ratings = np.count_nonzero(~np.isnan(ratings_mat))
    
    # initialize the user and movie matrices with random values
    user_mat = np.random.rand(n_users, latent_features)
    movie_mat = np.random.rand(latent_features, n_movies)
    
    # initialize sse at 0 for first iteration
    sse_accum = 0
    
    # keep track of iteration and MSE
    print("Optimizaiton Statistics")
    print("Iterations | Mean Squared Error ")
    
    # for each iteration
    for iteration in range(iters):

        # update our sse
        old_sse = sse_accum
        sse_accum = 0
        
        # For each user-movie pair
        for i in range(n_users):
            for j in range(n_movies):
                
                # if the rating exists
                if ratings_mat[i, j] > 0:
                    
                    # compute the error as the actual minus the dot product of the user and movie latent features
                    diff = ratings_mat[i, j] - np.dot(user_mat[i, :], movie_mat[:, j])
                    
                    # Keep track of the sum of squared errors for the matrix
                    sse_accum += diff**2
                    
                    # update the values in each matrix in the direction of the gradient
                    for k in range(latent_features):
                        user_mat[i, k] += learning_rate * (2*diff*movie_mat[k, j])
                        movie_mat[k, j] += learning_rate * (2*diff*user_mat[i, k])

        # print results
        print("%d \t\t %f" % (iteration+1, sse_accum / num_ratings))
        
    return user_mat, movie_mat 

Try out your function on the **user_movie_subset** dataset.  First try 4 latent features, a learning rate of 0.005, and 10 iterations.  When you take the dot product of the resulting **U** and **V** matrices, how does the resulting **user_movie** matrix compare to the original subset of the data?

In [41]:
user_mat, movie_mat = FunkSVD(ratings_mat, latent_features=4, learning_rate=0.005, iters=10)

Optimizaiton Statistics
Iterations | Mean Squared Error 
1 		 42.818102
2 		 14.437540
3 		 3.959312
4 		 2.826553
5 		 2.739173
6 		 2.707853
7 		 2.680992
8 		 2.653271
9 		 2.623413
10 		 2.590815


In [42]:
print(np.dot(user_mat, movie_mat))
print(ratings_mat)

[[ 9.93745477  8.94556966 10.5980805  10.2086123 ]
 [ 8.37466663  7.3099209   9.18813826  8.92126685]
 [ 8.14825596  7.28399148  7.99250202  7.61925215]
 [ 9.80321869  8.48507249  9.81785148  9.51747416]
 [ 8.18591893  6.91339637  9.37379589  8.69135024]
 [ 6.88655022  6.09874029  7.31962285  6.67501859]
 [ 9.18033324  8.26315056  9.45168111  9.28041608]
 [ 8.3768202   7.4752498   8.3704197   8.13601249]
 [ 8.3383807   7.10733475  9.03120888  8.64316241]
 [ 7.46551676  6.54514906  8.20440219  7.78093028]
 [ 9.1932336   7.88015524  9.21781424  8.71806748]
 [ 9.71611531  8.42400263 10.02209085  9.52344794]
 [ 9.23576837  8.27290073  9.6305589   9.26475832]
 [ 6.30516066  5.7867291   6.84157938  6.92048947]
 [ 9.97046136  8.45592424 10.21696914  9.66820708]
 [ 9.81293314  8.36099137 10.34917226  9.69778952]
 [ 8.3422177   7.45588828  8.37264896  8.43024885]
 [ 7.33981545  6.58632974  7.22167974  6.92577421]
 [ 8.18255976  7.03622064  9.05410717  8.46271225]
 [ 8.08831514  6.94659961  8.65

**The predicted ratings from the dot product are already starting to look a lot like the original data values even after only 10 iterations.  You can see some extreme low values that are not captured well yet.  The 5 in the second to last row in the first column is predicted as an 8, and the 4 in the second row and second column is predicted to be a 7.  Clearly the model is not done learning, but things are looking good.**

Let's try out the function again on the **user_movie_subset** dataset.  This time we will again use 4 latent features and a learning rate of 0.005.  However, let's bump up the number of iterations to 250.  When you take the dot product of the resulting **U** and **V** matrices, how does the resulting **user_movie** matrix compare to the original subset of the data?  What do you notice about your error at the end of the 250 iterations?

In [43]:
user_mat, movie_mat = FunkSVD(ratings_mat, latent_features=4, learning_rate=0.005, iters=250)

Optimizaiton Statistics
Iterations | Mean Squared Error 
1 		 43.052148
2 		 14.296328
3 		 3.881796
4 		 2.778610
5 		 2.674769
6 		 2.633333
7 		 2.599688
8 		 2.565017
9 		 2.526897
10 		 2.484358
11 		 2.436725
12 		 2.383384
13 		 2.323772
14 		 2.257400
15 		 2.183896
16 		 2.103057
17 		 2.014896
18 		 1.919707
19 		 1.818111
20 		 1.711098
21 		 1.600042
22 		 1.486676
23 		 1.373028
24 		 1.261291
25 		 1.153658
26 		 1.052136
27 		 0.958361
28 		 0.873467
29 		 0.798016
30 		 0.732016
31 		 0.674996
32 		 0.626141
33 		 0.584422
34 		 0.548732
35 		 0.517982
36 		 0.491168
37 		 0.467412
38 		 0.445973
39 		 0.426242
40 		 0.407737
41 		 0.390081
42 		 0.372990
43 		 0.356254
44 		 0.339729
45 		 0.323320
46 		 0.306975
47 		 0.290679
48 		 0.274442
49 		 0.258300
50 		 0.242303
51 		 0.226516
52 		 0.211011
53 		 0.195865
54 		 0.181153
55 		 0.166950
56 		 0.153321
57 		 0.140325
58 		 0.128010
59 		 0.116410
60 		 0.105551
61 		 0.095442
62 		 0.086084
63 		 0.077466
64 		

In [44]:
print(np.dot(user_mat, movie_mat))
print(ratings_mat)

[[10.00001746 10.00000473 10.00000397  9.99998414]
 [10.0000117   4.000003    9.00000247  9.9999893 ]
 [ 7.99999924  8.99999956  9.99999956  5.00000056]
 [ 8.99999803  7.99999908  9.99999915 10.00000171]
 [ 9.99997812  4.9999938   8.99999473  9.00001974]
 [ 6.00000714  4.00000198 10.00000167  5.99999348]
 [ 9.00000632  8.00000162 10.00000134  8.99999421]
 [10.00000041  5.00000006  9.00000002  7.99999953]
 [ 6.99999797  7.99999926  9.99999934  8.00000178]
 [ 9.00001228  5.00000329  9.00000273  6.99998876]
 [ 8.99999323  7.99999792  9.9999982   8.00000605]
 [ 8.99998839  9.99999669  9.99999718  9.00001047]
 [10.00000935  9.00000256 10.00000215  7.99999147]
 [ 5.00000319  8.00000073  5.0000006   7.99999714]
 [ 9.99998091  7.99999469  9.99999551 10.00001725]
 [ 8.99998536  8.99999617  9.99999682 10.0000133 ]
 [ 9.00000729  8.00000229  8.00000198  7.99999341]
 [10.00000308  8.00000096  1.00000085  9.9999973 ]
 [ 5.00000533  6.00000163 10.00000141  9.99999521]
 [ 8.00000784  7.00000227 10.00

**In this case, we were able to completely reconstruct the item-movie matrix to obtain an essentially 0 mean squared error. I obtained 0 MSE on iteration 165.**

The last time we placed an **np.nan** value into this matrix the entire svd algorithm in python broke.  Let's see if that is still the case using your FunkSVD function.  In the below cell, I have placed a nan into the first cell of your numpy array.  

Use 4 latent features, a learning rate of 0.005, and 250 iterations.  Are you able to run your SVD without it breaking (something that was not true about the python built in)?  Do you get a prediction for the nan value?  What is your prediction for the missing value? Use the cells below to answer these questions.

In [45]:
ratings_mat[0, 0] = np.nan
ratings_mat

matrix([[nan, 10., 10., 10.],
        [10.,  4.,  9., 10.],
        [ 8.,  9., 10.,  5.],
        [ 9.,  8., 10., 10.],
        [10.,  5.,  9.,  9.],
        [ 6.,  4., 10.,  6.],
        [ 9.,  8., 10.,  9.],
        [10.,  5.,  9.,  8.],
        [ 7.,  8., 10.,  8.],
        [ 9.,  5.,  9.,  7.],
        [ 9.,  8., 10.,  8.],
        [ 9., 10., 10.,  9.],
        [10.,  9., 10.,  8.],
        [ 5.,  8.,  5.,  8.],
        [10.,  8., 10., 10.],
        [ 9.,  9., 10., 10.],
        [ 9.,  8.,  8.,  8.],
        [10.,  8.,  1., 10.],
        [ 5.,  6., 10., 10.],
        [ 8.,  7., 10.,  7.]])

In [46]:
# run SVD on the matrix with the missing value
user_mat, movie_mat = FunkSVD(ratings_mat, latent_features=4, learning_rate=0.005, iters=250)

Optimizaiton Statistics
Iterations | Mean Squared Error 
1 		 40.934817
2 		 13.346218
3 		 3.969175
4 		 2.880902
5 		 2.746773
6 		 2.701432
7 		 2.673226
8 		 2.648985
9 		 2.625182
10 		 2.600632
11 		 2.574770
12 		 2.547210
13 		 2.517631
14 		 2.485735
15 		 2.451228
16 		 2.413811
17 		 2.373168
18 		 2.328953
19 		 2.280788
20 		 2.228253
21 		 2.170892
22 		 2.108218
23 		 2.039742
24 		 1.965009
25 		 1.883657
26 		 1.795497
27 		 1.700602
28 		 1.599404
29 		 1.492768
30 		 1.382036
31 		 1.268999
32 		 1.155795
33 		 1.044730
34 		 0.938037
35 		 0.837641
36 		 0.744955
37 		 0.660781
38 		 0.585307
39 		 0.518207
40 		 0.458800
41 		 0.406217
42 		 0.359553
43 		 0.317966
44 		 0.280744
45 		 0.247312
46 		 0.217231
47 		 0.190165
48 		 0.165861
49 		 0.144112
50 		 0.124741
51 		 0.107582
52 		 0.092472
53 		 0.079246
54 		 0.067737
55 		 0.057779
56 		 0.049206
57 		 0.041859
58 		 0.035587
59 		 0.030250
60 		 0.025719
61 		 0.021881
62 		 0.018633
63 		 0.015886
64 		

In [47]:
preds = np.dot(user_mat, movie_mat)
print("The predicted value for the missing rating is {}:".format(preds[0,0]))
print()
print("The actual value for the missing rating is {}:".format(ratings_mat[0,0]))
print()
print("That's right! You just predicted a rating for a user-movie pair that was never rated!")
print("But if you look in the original matrix, this was actually a value of 10. Not bad!")

The predicted value for the missing rating is 8.499732763178322:

The actual value for the missing rating is nan:

That's right! You just predicted a rating for a user-movie pair that was never rated!
But if you look in the original matrix, this was actually a value of 10. Not bad!
