In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances

In [6]:
class collaborativeFilteringRecommendationEngine:
    
    def dataRead(self):
        
        u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
        userDf = pd.read_csv(u.userFileLocation, sep = "|", encoding = "latin1", names = u_cols)

        r_cols = ['user_id', 'movie_id', 'ratings', 'timestamp']
        ratingsDf = pd.read_csv(u.dataFileLocation, sep = "\t", encoding = "latin1", names = r_cols)

        i_cols = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
        'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
        'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
        itemDf = pd.read_csv(u.itemFileLocation, sep = "|", encoding = "latin1", names = i_cols)
        
        return userDf, ratingsDf, itemDf
    
    def dataPrep(self, ratingsDf):

        n_users = ratingsDf.user_id.unique().shape[0]
        n_items = ratingsDf.movie_id.unique().shape[0]

        data_matrix = np.zeros((n_users,n_items))
        for line in ratingsDf.itertuples():
            data_matrix[line[1]-1,line[2]-1] = line[3]

        return data_matrix

    def similarity(self, data_matrix, metrics):

        user_similarity = pairwise_distances(data_matrix, metric = metrics)
        item_similarity = pairwise_distances(data_matrix.T, metric = metrics)

        return user_similarity, item_similarity

    def predict(self, ratings, similarity, type='user'):
        
        if type == 'user':
            
            mean_user_rating = ratings.mean(axis=1)
            #We use np.newaxis so that mean_user_rating has same format as ratings
            ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
            pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
        
        elif type == 'item':
        
            pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
        
        return pred

    def recommend(self, pred, itemDf):

        recommendations = []
        for item in pred:
            recommendations.append(item.tolist().index(max(item)))

        recommendationsDf = pd.DataFrame(data = recommendations, columns = ["movie id"])
        recommendationsDf['user_id'] = range(1, len(recommendationsDf) + 1)

        resultDf = pd.merge(recommendationsDf[["user_id", "movie id"]],
                     itemDf[["movie id", "movie title"]],
                     on='movie id', 
                     how='inner', sort = False)

        return resultDf

In [7]:
def main():
    
    cfre = collaborativeFilteringRecommendationEngine()
    
    userDf, ratingsDf, itemDf = cfre.dataRead()
    
    data_matrix = cfre.dataPrep(ratingsDf)
    
    user_similarity, item_similarity = cfre.similarity(data_matrix, "cosine")
    
    user_prediction = cfre.predict(data_matrix, user_similarity, type='user')
    item_prediction = cfre.predict(data_matrix, item_similarity, type='item')
    
    resultDf = cfre.recommend(item_prediction, itemDf)
    print(resultDf)
    
    return resultDf

In [8]:
if __name__ == "__main__":
    
    resultDf = main()   

     user_id  movie id                                        movie title
0          1      1386                      Tetsuo II: Body Hammer (1992)
1          6      1386                      Tetsuo II: Body Hammer (1992)
2         10      1386                      Tetsuo II: Body Hammer (1992)
3         12      1386                      Tetsuo II: Body Hammer (1992)
4         13      1386                      Tetsuo II: Body Hammer (1992)
5         14      1386                      Tetsuo II: Body Hammer (1992)
6         16      1386                      Tetsuo II: Body Hammer (1992)
7         18      1386                      Tetsuo II: Body Hammer (1992)
8         23      1386                      Tetsuo II: Body Hammer (1992)
9         24      1386                      Tetsuo II: Body Hammer (1992)
10        25      1386                      Tetsuo II: Body Hammer (1992)
11        37      1386                      Tetsuo II: Body Hammer (1992)
12        41      1386                