In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
# Download Dataset
# 1. https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv
# 2. https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv


In [15]:
ratings = pd.read_csv("ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


Kolom dalam dataset:
Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object')


In [8]:
movies = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv")
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
n_ratings = len(ratings)
n_movies = len(ratings['movieId'].unique())
n_users = len(ratings['userId'].unique())
print(f"Number of ratings: {n_ratings}")
print(f"Number of unique movieId's: {n_movies}")
print(f"Number of unique users: {n_users}")
print(f"Average ratings per user: {round(n_ratings/n_users, 2)}")
print(f"Average ratings per movie: {round(n_ratings/n_movies, 2)}")

Number of ratings: 100836
Number of unique movieId's: 9724
Number of unique users: 610
Average ratings per user: 165.3
Average ratings per movie: 10.37


In [9]:
user_freq = ratings[['userId', 'movieId']].groupby('userId').count().reset_index()
user_freq.columns = ['userId', 'n_ratings']
user_freq.head()

Unnamed: 0,userId,n_ratings
0,1,232
1,2,29
2,3,39
3,4,216
4,5,44


In [10]:
class MovieRecommender:
    
    """
    Recommendation Class 
        
    """
    
    def __init__(self,movie_name='Toy',number_of_recommend=10):
        
        print('\033[4m'+'\033[1m'+'Trying to find similar movies'+'\033[0m')
        try:
            ratings,movies,movie_titles,idd_ = self.get_instances(movie_name)
            self.movie_name = movie_name
            self.movies = movies
            self.movie_titles = movie_titles
            self.ratings = ratings
            self.idd_ = idd_
            self.number_of_recommend = number_of_recommend

            matrix_crs, user_mapper, self.movie_mapper, self.user_inv_mapper, self.movie_inv_mapper = self.matrix(self.ratings)
            neighbour_ids = self.predict_(self.idd_,matrix_crs,k=number_of_recommend)
            self.recommend(neighbour_ids,movie_titles)
            print('\nDone')
        except:
               print('Movie not founded!')

    
    
    def matrix(self,df):

        user_unique = len(df['userId'].unique())
        movie_unique = len(df['movieId'].unique())

        # Map Ids to indices
        user_mapper = dict(zip(np.unique(df["userId"]), list(range(user_unique))))
        movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(movie_unique))))

        # Map indices to IDs
        user_inv_mapper = dict(zip(list(range(user_unique)), np.unique(df["userId"])))
        movie_inv_mapper = dict(zip(list(range(movie_unique)), np.unique(df["movieId"])))

        user_index = [user_mapper[i] for i in df['userId']]
        movie_index = [movie_mapper[i] for i in df['movieId']]

        matrix = csr_matrix((df["rating"], (movie_index, user_index)), shape=(movie_unique, user_unique))

        return matrix, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper

    
    def predict_(self,movie_id, data, k, metric='cosine', show_distance=False):

        neighbour_ids = []

        movie_ind = self.movie_mapper[movie_id]
        movie_vec = data[movie_ind]
        k+=1
        kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
        kNN.fit(data)
        movie_vec = movie_vec.reshape(1,-1)
        neighbour = kNN.kneighbors(movie_vec, return_distance=show_distance)
        for i in range(0,k):
            n = neighbour.item(i)
            neighbour_ids.append(self.movie_inv_mapper[n])
        neighbour_ids.pop(0)
        return neighbour_ids

    def recommend(self,similar_ids,movie_titles):
        
        print('\033[1m'f"\nRecommendation for movie : {self.movie_name}\n")
        for i in similar_ids:
            print('\033[1m'+'\033[92m'+movie_titles[i])
            
    def get_instances(self,movie_name):

        ratings = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv")
        movies = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv")
        movie_titles = dict(zip(movies['movieId'], movies['title']))
        movie_table = pd.DataFrame(movie_titles.items(),columns=['movie_id','movie_name'])
        movie_table['movie_name'] = movie_table['movie_name'].str.lower()
        idd_ = pd.DataFrame(movie_table.movie_id.where(movie_table['movie_name'].str.contains(str(movie_name).lower())))
        idd_ = int(idd_[idd_.movie_id.notna()]['movie_id'].iloc[0])
        return ratings,movies,movie_titles,idd_

In [11]:
MovieRecommender('Spider-man',number_of_recommend=7)

[4m[1mTrying to find similar movies[0m
[1m
Recommendation for movie : Spider-man

[1m[92mSpider-Man 2 (2004)
[1m[92mStar Wars: Episode II - Attack of the Clones (2002)
[1m[92mMinority Report (2002)
[1m[92mX2: X-Men United (2003)
[1m[92mX-Men (2000)
[1m[92mPirates of the Caribbean: The Curse of the Black Pearl (2003)
[1m[92mLord of the Rings: The Fellowship of the Ring, The (2001)

Done


<__main__.MovieRecommender at 0x1689ca1d100>

In [12]:
MovieRecommender('avatar',number_of_recommend=5)

[4m[1mTrying to find similar movies[0m
[1m
Recommendation for movie : avatar

[1m[92mUp (2009)
[1m[92mIron Man (2008)
[1m[92mDistrict 9 (2009)
[1m[92mWALL·E (2008)
[1m[92mHangover, The (2009)

Done


<__main__.MovieRecommender at 0x168ab102ed0>

In [13]:
MovieRecommender('Forrest Gump',number_of_recommend=12)

[4m[1mTrying to find similar movies[0m
[1m
Recommendation for movie : Forrest Gump

[1m[92mShawshank Redemption, The (1994)
[1m[92mJurassic Park (1993)
[1m[92mPulp Fiction (1994)
[1m[92mBraveheart (1995)
[1m[92mSilence of the Lambs, The (1991)
[1m[92mApollo 13 (1995)
[1m[92mMatrix, The (1999)
[1m[92mMrs. Doubtfire (1993)
[1m[92mSchindler's List (1993)
[1m[92mTerminator 2: Judgment Day (1991)
[1m[92mLion King, The (1994)
[1m[92mSeven (a.k.a. Se7en) (1995)

Done


<__main__.MovieRecommender at 0x168c02ab8f0>