In [1]:
import numpy as np

# Netflix Rating Recommendation Algo

### Aim:
To predict the rating a user would assign a movie they haven't seen in order to rank the movies and recommended the best ones. This is achieved by creating the following recommendation algorithim:
1. Basline predictior of average of user ratings for a given movie
2. Account for user and movie bias using least squares
3. Contruct neighbourhood for each movie containing extremely similiar and disimiliar movies and optimize prediction based on this.
 
Further optimizations on the algorithim can include:
- Create user neightbourhoods as well
- Factor in time od movie release
- Factor in preferences based on movie length, type, content, etc.

### Algorithm Description:
Assuming we have users $ {u_1, u_2 ... u_N} $ and movies $ {i_1, i_2 ... i_M} $ with each rating for movie $ i $ by user $ u $ decribed as $ r_{ui} $. We are provided a sparese matrix $ R $ of these ratings and need to predict all the $ r_{ui} $ values using this.

1. We will first assign a baseline prediction $ \overline{r} $ for each movie as the average of all ratings. 



2. We will then model the bias of each user and each movie using least sqaures to find the optimal values of $ b_u $ and $b_i$ to obatain the a better prediction for each user-movie pair: 
$$ \hat{r_{ui}} = \overline{r} + b_u^{*} + b_i^{*} $$


3. We will find a neightbourhood ($ L_i$) for each movie $i$ containing movies most similiar or disimiliar modelled by using the absolute cosine coefficent ($ c_{ij} $)as the metric and pick the top neightbour_hood size movies. 


4. For movie $i$ and user $u$ incorpotate the ratings of the user for all movies in the neightbourhood of movie $i : L_i$ as:
$$ \hat{r_{ui}}^N = \hat{r_{ui}} +  \frac{  \sum_{j\in L_i} d_{ij}*\tilde{r_{uj}} } { \sum_{j\in L_i}|d_{ij}|}  $$


Input Parameters:
- A matrix of users and their movie ratings:
    - Row : each row represents a user
    - Col : each column representa a movie
    - Data point : each data point is a rating
        - a positive number if assigned
        - 0 if not assigned
- Neightbour hood size        

Output Paraments:
- A matrix of users and their predicted movie ratings.
- The RMSE value of the prediction
    

In [4]:
def NetflixRatings(R, minRating, maxRating, neightbourhoodSize = 2):    

    R_valid = (R != 0)
    
    numUsers = R.shape[0]
    numMovies = R.shape[1]
    
    numRatings = 0
    sumRatings = 0
    
    missingRatings = 0
    for u in range(numUsers):
        for i in range(numMovies):
            R_ui = R[u][i]
            if (R_ui == 0):
                #print("No rating for user " + str(u) + " and movie " + str(i) + "\n") 
                missingRatings += 1
            else:
                numRatings+=1
                sumRatings += R_ui
    
    R_avg = sumRatings/numRatings
    #print("Average rating: " + str(r_avg) + "\n")
    
    A = np.zeros((numUsers*numMovies - missingRatings ,numUsers+numMovies))
    b = np.zeros((numUsers*numMovies - missingRatings,1))
    
    index = 0
    for u in range(numUsers):
        for i in range(numMovies):
            R_ui = R[u][i]
            
            if (R_ui != 0):
                # Select User
                A[index][u] = 1
                # Select Movie
                A[index][numUsers + i] = 1                
                # Select Rating
                b[index] = R_ui - R_avg
                #increment index    
                index += 1        
                    
    solutions = np.linalg.lstsq(A,b, rcond=None)    
    x = solutions[0]
    
    debugLS = True
    if (debugLS):
        print("A: ")
        print(A.shape)
        print(A)
        print()

        print("b: ")
        print(b.shape)
        print(b)
        print()

        print("x: ")
        print(x.shape)
        print(x)
        print()
        
    R_hat = np.zeros((numUsers,numMovies))
    
    for u in range(numUsers):
        for i in range(numMovies):
            R_ui_pred = R_avg + x[u] + x[numUsers+i]
            R_hat[u][i] = R_ui_pred 

            
    R_hat = np.clip(R_hat, minRating , maxRating)
    R_tilde = np.around(R - R_hat, 4)
    #R_tilde = R - R_hat
    
    for u in range(numUsers):
        for i in range(numMovies):
            if R_valid[u][i] == False:
                R_tilde[u][i] = 0
    
    debugBaseline = True
    if debugBaseline:
        print("Given Matrix : ")
        print(R)
        print()
        
        print("R_hat prediction: ")
        print(R_hat)
        print()
        
        print("R_tilde residuals: ")
        print(R_tilde)
        print()

    
    # Calculate cosine similiarity matrix
    
    L = np.zeros((numMovies,numMovies))
    
    for i in range(numMovies):
        for j in range(i+1):
            
            if i != j :
                
                valid_indx_i = R[:,i]>0
                valid_indx_j = R[:,j]>0
                valid_indx = np.logical_and(valid_indx_i,valid_indx_j)
       
                movie_i = R_tilde[valid_indx, i]
                movie_j = R_tilde[valid_indx, j]
                
                cosine_similiarity =  np.dot(movie_i, movie_j)/(np.linalg.norm(movie_i)*np.linalg.norm(movie_j))
                
                L[i][j] = cosine_similiarity
                L[j][i] = cosine_similiarity

    # Sort the values in the matrix
    L_ranking = np.zeros((numMovies,numMovies))
    for i in range(numMovies):        
        L_i = abs(L[i])
        ranking = np.argsort(np.argsort(L_i))
        L_ranking[i] = ranking
        
    debugCosine = True
    
    if debugCosine:
        print("Cosine Similiarities : ")
        print(L)
        print()

        print("Cosine rankings:")
        print(L_ranking)
        print()
        

    # Calculate final predictions
    R_neightbourhood = np.zeros((numUsers,numMovies))
    
    
    debugNeighLogic = False
    
    for u in range(numUsers):
        for i in range(numMovies):
            
            i_neighbourhood = 0
            i_normalization = 0
            
            for l in range(neightbourhoodSize):

                rank = numMovies - l - 1
                movie_j = np.where( L_ranking[i] == rank)[0][0]
                                
                L_ij = L[i][movie_j]
                R_tilde_uj = R_tilde[u][movie_j]
                
                i_neighbourhood += L_ij*R_tilde_uj
                i_normalization += abs(L_ij)
                
                
                if (u == 0) and debugNeighLogic:
                    
                    print("movie: " + str(i))
                    
                    print("rank : " + str(rank))
                    print("movie_j : " + str(movie_j))
                    
                    print("L_ij: " + str(L_ij))
                    print("R_tilde_uj: " + str(R_tilde_uj))
                    
                    print()
                
            R_neightbourhood[u][i] += i_neighbourhood/i_normalization
    
    
    debugNeigh = True
    
    if debugNeigh :
        
        print("R_neightbourhood:")
        print(R_neightbourhood)
        print()
    
    R_neightbourhood = np.clip(R_hat + R_neightbourhood, minRating, maxRating)
    
    mse = (np.square(R-R_neightbourhood)).mean(axis=None)
    
    return R_neightbourhood, mse
            

In [5]:
R = np.array( [ [1, 9, 9, 8, 3, 0],
                [2, 0, 0, 0, 4, 6],
                [0, 2, 3, 6, 3, 5],
                [5, 6, 5, 1, 0, 0]
              ])

NetflixRatings(R, 1, 10, 2)

A: 
(17, 10)
[[1. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 1. 0. 0.]]

b: 
(17, 1)
[[-3.58823529]
 [ 4.41176471]
 [ 4.41176471]
 [ 3.41176471]
 [-1.58823529]
 [-2.58823529]
 [-0.58823529]
 [ 1.41176471]
 [-2.58823529]
 [-1.58823529]
 [ 1.41176471]
 [-1.58823529]
 [ 0.41176471]
 [ 0.41176471]
 [ 1.41176471]
 [ 0.41176471]
 [-3.58823529]]

x: 
(10, 1)
[[ 1.66610644]
 [ 0.21372549]
 [-1.31008403]
 [-0.38151261]
 [-2.4210084 ]
 [ 1.0869281 ]
 [ 1.0869281 ]
 [ 0.42026144]
 [-1.44481793]
 [ 1.459

(array([[3.19411629, 8.21137255, 8.24297365, 8.0410538 , 3.15082381,
         7.54763571],
        [2.24536213, 5.71571349, 5.69991207, 5.27038156, 3.35714286,
         6.26190476],
        [1.        , 3.66810397, 3.65230255, 3.83400295, 3.68096709,
         6.57144524],
        [3.53495243, 5.29365079, 5.29365079, 3.07678392, 2.57307101,
         4.00001667]]), 8.248996715902518)