# Chapter 3.4 - Naive Bayes Collaborative Filtering

The code below is my attempt at coding a Recommender system using the Naive Bayes formula and its variation 
for Colalborative filtering. The code follows the Chapter 3.4 in the Book.

Do note that this is a SAMPLE code and is under no way optimized for PRODUCTION use. Feel free to use it for educational purposes.

In [2]:
import numpy as np
import pandas as pd
from math import sqrt
import operator

We're going to recreate the input data, the same way it was presented in the Book (page 85, Table 3.2)

In [3]:
data = [
    [1, -1, 1, -1, 1, -1],
    [1, 1, np.NAN, -1, -1, -1],
    [np.NAN, 1, 1, -1, -1, np.NAN],
    [-1, -1, -1, 1, 1, 1],
    [-1, np.NAN, -1, 1, 1, 1]
]

df = pd.DataFrame(data, columns=range(1, 7), index=range(1,6))
df

Unnamed: 0,1,2,3,4,5,6
1,1.0,-1.0,1.0,-1,1,-1.0
2,1.0,1.0,,-1,-1,-1.0
3,,1.0,1.0,-1,-1,
4,-1.0,-1.0,-1.0,1,1,1.0
5,-1.0,,-1.0,1,1,1.0


In [17]:
def get_probability_of_rating_for_given_movie(j, v):
    '''
    Returns probability of movie J being rated with specified rating V
    
    According to book, P(r_uj = v) is called prior probability and is
    is estimated to the fraction of the users that have specified the 
    rating v for the jth item (page 82 in the book).
    '''

    return len(df[df[j] == v]) / len(df[j].dropna())

def get_probability_of_movie_rating_considering_others(j, j_val, k, k_val):
    # Users to consider is list of users who rated movie J with J_val
    # and who have also rated movie K
    users_to_consider = (df[j] == j_val) & (pd.notna(df[k]))
    
    # Users to consider is fraction of above calculated number of users
    # who have ALSO rated the given movie with specified value
    users_who_rated_the_same = df.loc[users_to_consider][k] == k_val
    
    # This is the final calculation
    return len(users_who_rated_the_same[users_who_rated_the_same]) / len(users_to_consider[users_to_consider])

def predict_probability_of_rating(u, j, v):
    '''Predict probability of user "u" rating movie "j" with rating "v" '''
    
    # First we need list of all movies rated by this user I_u
    I_u = df.loc[u].dropna().index.values # Array of movie indexes rated by user "u"
    
    # s are the allowed values that rating can take; in our case - that is either 1 or -1
    s = [1, -1]
    
    probabilities = []
    
    # Get probability that this movie is rated with rating "v" at all
        
    print("Predicting probability of user {} rating movie {} with {} ...".format(u, j, v))

    p_j_v = get_probability_of_rating_for_given_movie(j, v)

    print("\tGeneral probability of movie {} being rated with {} is {}".format(j, v, p_j_v))

    # Calculate conditional probabilities now (i.e. ratings for all movies that user has rated
    # taking into account the rating "v")

    total_probability = 1

    for movie_idx in I_u:
        users_rating = df.loc[u][movie_idx]

        conditional_proba = get_probability_of_movie_rating_considering_others(j, v, movie_idx, users_rating)

        print("\tConditional probability P(r{}{} = {}|r{}{} = {}) = {}".format(u, movie_idx, users_rating, u,j, v, conditional_proba))

        total_probability = total_probability * conditional_proba


    r_u_j = p_j_v * total_probability
    print("Probability of user {} rating movie {} with {} is {}".format(u, j, v, r_u_j))


    return r_u_j

    # Now we need probability of movie J being rated with rating V
    #p_u_jr = get_proba_of_movie_rating(j, v)
    
def predict_probability(u, j):
    '''Predicts the user "u" rating of movie "j"'''
    
    possible_ratings = [-1, 1]
    
    probabilities = []
    
    for v in possible_ratings:
        probabilities.append(predict_probability_of_rating(u, j, v))
        
    # Return the rating with highest likelihood
    return probabilities.index(max(probabilities))

In [21]:
print("User 3 is most likely to rate movie 1 with {}".format(predict_probability(3, 1)))

Predicting probability of user 3 rating movie 1 with -1 ...
	General probability of movie 1 being rated with -1 is 0.5
	Conditional probability P(r32 = 1.0|r31 = -1) = 0.0
	Conditional probability P(r33 = 1.0|r31 = -1) = 0.0
	Conditional probability P(r34 = -1.0|r31 = -1) = 0.0
	Conditional probability P(r35 = -1.0|r31 = -1) = 0.0
Probability of user 3 rating movie 1 with -1 is 0.0
Predicting probability of user 3 rating movie 1 with 1 ...
	General probability of movie 1 being rated with 1 is 0.5
	Conditional probability P(r32 = 1.0|r31 = 1) = 0.5
	Conditional probability P(r33 = 1.0|r31 = 1) = 1.0
	Conditional probability P(r34 = -1.0|r31 = 1) = 1.0
	Conditional probability P(r35 = -1.0|r31 = 1) = 0.5
Probability of user 3 rating movie 1 with 1 is 0.125
User 3 is most likely to rate movie 1 with 1
