# Basic Recommendation System by Prasit C.

## Import library

In [1]:
import numpy as np
import pandas as pd

## Loading Test Data Set

In [2]:
ratings = pd.read_csv('ml-latest-small/ratings.csv', usecols=range(3))
movies = pd.read_csv('ml-latest-small/movies.csv', usecols=range(2))
data = pd.merge(movies, ratings)
data.head()

Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),1,4.0
1,1,Toy Story (1995),5,4.0
2,1,Toy Story (1995),7,4.5
3,1,Toy Story (1995),15,2.5
4,1,Toy Story (1995),17,4.5


## Item Based Collaborative Filtering

In [3]:
class ItemBasedCollaborative:
    def __init__(self, data, col_name_user_id, col_name_item_name, col_name_rating, min_periods):
        self.data = data.copy()
        self.col_name_user_id = col_name_user_id
        self.col_name_item_name = col_name_item_name
        self.col_name_rating = col_name_rating
        self.ratings_matrix = None
        self.rating_corr_matrix = None
        self.min_periods = min_periods
        
    def fit(self):
        self.ratings_matrix = self.data.pivot_table(index=[self.col_name_user_id], 
                                               columns=[self.col_name_item_name], 
                                               values=self.col_name_rating)
        
        self.rating_corr_matrix = self.ratings_matrix.corr(method='pearson', min_periods = self.min_periods)

    def make_recommendation(self, item_rating_dict, top_n = None):
        if self.rating_corr_matrix is not None:
            all_candidates = pd.Series()

            for item in item_rating_dict:
                candidates = self.rating_corr_matrix[item].dropna()
                candidates = candidates.map(lambda x: x * item_rating_dict[item])
                all_candidates = all_candidates.append(candidates)

            # Remove duplicate
            all_candidates = all_candidates.groupby(all_candidates.index).sum()    
            all_candidates.sort_values(inplace=True, ascending=False)

            # Remove itself
            all_candidates = all_candidates.drop(item_rating_dict.keys(), errors='ignore')

            if top_n is None:
                return all_candidates
            else:
                return all_candidates[:top_n]
        else:
            raise('Please fit the data before make a recommendation.')
    
    def make_user_recommendation(self, user_id, top_n = None):
        if self.rating_corr_matrix is not None:
            item_rating_dict = dict(self.data[self.data[self.col_name_user_id ]==user_id]
                                    [[self.col_name_item_name,self.col_name_rating]].to_dict('split')['data'])
            if top_n is None:
                return self.make_recommendation(item_rating_dict)
            else:
                return self.make_recommendation(item_rating_dict)[:top_n]
        else:
            raise('Please fit the data before make a recommendation.')

### ItemBasedCollaborative Example

#### Example: Fit a model

In [4]:
ibc = ItemBasedCollaborative(data, 'userId', 'title', 'rating', 100)
ibc.fit()

#### Example: Finding similar movie

In [5]:
similar_movie = ibc.rating_corr_matrix['Matrix, The (1999)'].dropna()
similar_movie.sort_values(inplace=True, ascending=False)
similar_movie.head()

title
Matrix, The (1999)    1.000000
Die Hard (1988)       0.544466
Inception (2010)      0.514767
Braveheart (1995)     0.496045
Aliens (1986)         0.470865
Name: Matrix, The (1999), dtype: float64

#### Example: Make a recommendation using arbitrary user profile

In [6]:
dicts = {'Forrest Gump (1994)': 5.0, 'Matrix, The (1999)':5.0}
ibc.make_recommendation(dicts, 10)

Braveheart (1995)             4.565105
Inception (2010)              4.239366
Good Will Hunting (1997)      4.000757
Saving Private Ryan (1998)    3.990310
Die Hard (1988)               3.708853
Monsters, Inc. (2001)         3.676873
Finding Nemo (2003)           3.614290
Jurassic Park (1993)          3.590251
Truman Show, The (1998)       3.499706
Reservoir Dogs (1992)         3.148039
dtype: float64

#### Example: Make a user recommendation

In [7]:
user_id = 1
ibc.make_user_recommendation(user_id, 10)

Terminator 2: Judgment Day (1991)                            34.185307
Shawshank Redemption, The (1994)                             25.529866
Godfather, The (1972)                                        20.061134
Sixth Sense, The (1999)                                      15.341456
Lord of the Rings: The Two Towers, The (2002)                13.045573
Lord of the Rings: The Return of the King, The (2003)        12.615894
Aladdin (1992)                                               12.141136
Lord of the Rings: The Fellowship of the Ring, The (2001)    10.909863
True Lies (1994)                                             10.593481
Lion King, The (1994)                                        10.080328
dtype: float64

## User Based Collaborative Filtering

In [8]:
class UserBasedCollaborative:
    def __init__(self, data, col_name_user_id, col_name_item_name, col_name_rating, min_periods):
        self.data = data.copy()
        self.col_name_user_id = col_name_user_id
        self.col_name_item_name = col_name_item_name
        self.col_name_rating = col_name_rating
        self.min_periods = min_periods
        
    def fit(self):
        self.ratings_matrix = self.data.pivot_table(index=[self.col_name_item_name], 
                                               columns=[self.col_name_user_id], 
                                               values=self.col_name_rating)
        self.rating_corr_matrix = self.ratings_matrix.corr(method='pearson', min_periods=self.min_periods)
        
    def make_recommendation(self, user, top_n_user=10):
        
        candidate_user = self.rating_corr_matrix[user].dropna()
        candidate_user.sort_values(inplace=True, ascending=False)
        candidate_user = candidate_user.drop(user)[:top_n_user]
        
        self.candidate_user = candidate_user.copy()
        
        candidate_movie = set()
        seed_user_movie = set(self.data[self.data[self.col_name_user_id]==user][self.col_name_item_name])
        
        # Finding unseen item from top-n candidate user
        for similar_user in candidate_user.index:
            candidate_set = set(self.data[self.data[self.col_name_user_id]==similar_user][self.col_name_item_name])
            candidate_movie = candidate_movie.union((candidate_set.difference(seed_user_movie)))
        return list(candidate_movie)
    

### UserBasedCollaborative Example

#### Example: Fit a model

In [9]:
ubc = UserBasedCollaborative(data, 'userId', 'title', 'rating', 100)
ubc.fit()

#### Example: Make a recommendation

In [10]:
user_id = 6
ubc.make_recommendation(user_id, top_n_user = 2)

['Demolition Man (1993)',
 'Canadian Bacon (1995)',
 'Hard Target (1993)',
 'RoboCop 3 (1993)',
 'Johnny Mnemonic (1995)',
 'Searching for Bobby Fischer (1993)',
 'Clueless (1995)',
 'Phantom, The (1996)',
 'Airheads (1994)',
 'Beverly Hills Cop III (1994)',
 'Shadow, The (1994)',
 'Mortal Kombat (1995)',
 'Pocahontas (1995)',
 'Judge Dredd (1995)',
 'Jade (1995)',
 'Tales from the Crypt Presents: Demon Knight (1995)',
 'Toy Story (1995)',
 'Jerky Boys, The (1995)',
 'Drop Zone (1994)',
 'Free Willy 2: The Adventure Home (1995)',
 'Dragonheart (1996)',
 '101 Dalmatians (1996)',
 'Boomerang (1992)',
 'Jingle All the Way (1996)',
 'Nutty Professor, The (1996)',
 'Kazaam (1996)',
 'Star Trek: First Contact (1996)',
 'Mulholland Falls (1996)',
 'Operation Dumbo Drop (1995)',
 'For Love or Money (1993)',
 'Nightmare Before Christmas, The (1993)',
 'Virtuosity (1995)',
 'Space Jam (1996)',
 'Boxing Helena (1993)']

#### Example: Similar User

In [11]:
ubc.candidate_user

userId
181    0.433193
240    0.268207
Name: 6, dtype: float64