In [None]:
import numpy as np
import pandas as pd

import os

In [None]:
movies_file = 'data/ml-10M100K/movies.dat'
ratings_file = 'data/ml-10M100K/ratings.dat'

# set static variables
_pickle_dir = os.path.join(os.path.dirname(
    os.path.abspath('eda2.ipynb')), 'data\pickled')

# set appropriate paths for pickle files
_ratings_pickle_filename = 'movie_ratings.pkl'
ratings_pickle_path = os.path.join(_pickle_dir, _ratings_pickle_filename)
_movies_pickle_filename = 'movies.pkl'
movies_pickle_path = os.path.join(_pickle_dir, _movies_pickle_filename)

# try to load from pickle if available
try:
    # load movies df from pickle if it exists
    if os.path.exists(movies_pickle_path+"2"):
        movies_df = pd.read_pickle(movies_pickle_path)
    else :
        movies_df = pd.read_csv(movies_file, engine='python', encoding='utf-8',
                                        sep='::', header=None, names=['movie_id', 'movie_title', 'genres'],
                                        dtype={'movie_id': np.int32, 'name': np.chararray, 'genres': np.chararray})

    # load ratings df from pickle if it exists
    if os.path.exists(ratings_pickle_path+"2"):
        ratings_df = pd.read_pickle(ratings_pickle_path)
    else:
        ratings_df = pd.read_csv(ratings_file, engine='python', encoding='utf-8',
                                        sep='::', header=None, names=['user_id', 'movie_id', 'rating', 'timestamp'],
                                        dtype={'user_id': np.int32, 'movie_id': np.int32, 'rating': np.int32, 'timestamp': np.float64})
        ratings_df['timestamp'] = pd.to_datetime(ratings_df['timestamp'], unit='s', utc=True)

# catch exceptions
except Exception as e:
    print(e)


In [None]:
rating_pivot = ratings_df.pivot_table(values='rating',columns='user_id',index='movie_id').fillna(0)
print('Shape of this pivot table :',rating_pivot.shape)
rating_pivot.head()


#### **Pivot Table with respect to ratings given by users to movies**

#### **Machine Learning Model training for Recommending movies based on users ratings.**

In [None]:
from sklearn.neighbors import NearestNeighbors
nn_algo = NearestNeighbors(metric='cosine')
nn_algo.fit(rating_pivot)

#### **Developing the class of Collaborative filtering Recommendation Engine**

In [None]:
class KNNMovieRecommender:
    def __init__(self, movie_df, nn_algo, rating_pivot):
        self.hist = [] 
        self.ishist = False # Check if history is empty
        self.movies_df = movies_df
        self.nn_algo = nn_algo 
        self.rating_pivot = rating_pivot 

    def recommend_on_movie(self, movie, n_recommend = 5):
        self.ishist = True
        movieid = int(movies_df[self.movies_df['movie_title']==movie]['movie_id'])
        self.hist.append(movieid)
        distance,neighbors = nn_algo.kneighbors([rating_pivot.loc[movieid]],n_neighbors=n_recomend+1)
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movies_df[movies_df['movie_id']==mid]['movie_title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in [movieid]]
        return recommeds[:n_recomend]

    # This method will recommend movies based on history stored in self.hist list
    def recommend_on_history(self,n_recomend = 5):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(rating_pivot.loc[mid]) for mid in self.hist])
        distance,neighbors = nn_algo.kneighbors([np.average(history,axis=0)],n_neighbors=n_recomend + len(self.hist))
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        recommends = [str(movies_df[movies_df['movie_id']==mid]['movie_title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in self.hist]
        return recommends[:n_recomend]
    

#### **Observation of this Recommendation Engine on some Movies**

In [None]:
# linitializing the Recommender Object
recommender = Recommender()  

In [None]:
# Recommendation based on past watched movies, but the object just initialized. So, therefore no history found
recommender.recommend_on_history()    

In [None]:
# Recommendation based on this movie 
recommender.recommend_on_movie('Toy Story (1995)')
recommender.recommend_on_movie('Aladdin (1992)')
# recommender.recommend_on_movie('Back to the Future (1985)')

In [None]:
# Recommendation based on past watched movies, and this time a movie is there in the history.
recommender.recommend_on_history()

In [None]:
# Recommendation based on this movie
recommender.recommend_on_movie('Tigerland (2000)')

In [None]:
# Recommendation based on past watched movies, and this time two movies is there in the history.
recommender.recommend_on_history()

In [None]:
# Recommendation based on this movie
recommender.recommend_on_movie('Tomorrow Never Dies (1997)')

In [None]:
# Recommendation based on past watched movies, and this time three movies is there in the history.
# recommender.recommend_on_movie('Toy Story (1995)')
recommender.recommend_on_history()

In [None]:
# Recommendation based on this movie
recommender.recommend_on_movie('Money Train (1995)')

In [None]:
# Recommendation based on past watched movies, and this time four movies is there in the history.
recommender.recommend_on_history()

In [None]:
# Recommendation based on this movie
recommender.recommend_on_movie('GoldenEye (1995)')

In [None]:
# Recommendation based on past watched movies, and this time five movies is there in the history.
recommender.recommend_on_history()

<br>

<div align='center'><img src='https://miro.medium.com/max/792/1*P63ZaFHlssabl34XbJgong.jpeg'></div>

## **Recommendation System using Content Based Filtering**

##### This Recommendation System will works like "This movies are similar to the movie you recently watched".

#### **Vectorization of contents of movies**

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(stop_words='english')
genres = vectorizer.fit_transform(movies.genres).toarray()
contents = pd.DataFrame(genres,columns=vectorizer.get_feature_names())
print('Shape of the content table :',contents.shape)
contents.head()

#### **Machine Learning Model training for Recommending movies based on movies contents.**

In [None]:
from sklearn.neighbors import NearestNeighbors
nn_algo = NearestNeighbors(metric='cosine')
nn_algo.fit(contents)

#### **Developing the class of Content Based filtering Recommendation Engine**

In [None]:
class Recommender:
    def __init__(self):
        # This list will stored movies that called atleast ones using recommend_on_movie method
        self.hist = [] 
        self.ishist = False # Check if history is empty
    
    # This method will recommend movies based on a movie that passed as the parameter
    def recommend_on_movie(self,movie,n_reccomend = 5):
        self.ishist = True
        iloc = movies[movies['title']==movie].index[0]
        self.hist.append(iloc)
        distance,neighbors = nn_algo.kneighbors([contents.iloc[iloc]],n_neighbors=n_reccomend+1)
        recommeds = [movies.iloc[i]['title'] for i in neighbors[0] if i not in [iloc]]
        return recommeds[:n_reccomend]
    
    # This method will recommend movies based on history stored in self.hist list
    def recommend_on_history(self,n_reccomend = 5):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(contents.iloc[iloc]) for iloc in self.hist])
        distance,neighbors = nn_algo.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.hist))
        recommeds = [movies.iloc[i]['title'] for i in neighbors[0] if i not in self.hist]
        return recommeds[:n_reccomend]

#### **Observation of this Recommendation Engine on some Movies**

In [None]:
# linitializing the Recommender Object
recommender = Recommender()  

In [None]:
# Recommendation based on past watched movies, but the object just initialized. So, therefore no history found
recommender.recommend_on_history()    

In [None]:
# Recommendation based on this movie 
recommender.recommend_on_movie('Godfather, The (1972)')
# recommender.recommend_on_movie('American Beauty (1999)')
recommender.recommend_on_movie('Fargo (1996)')

In [None]:
# Recommendation based on past watched movies, and this time a movie is there in the history.
recommender.recommend_on_history()

In [None]:
# Recommendation based on this movie
recommender.recommend_on_movie('Heat (1995)')

In [None]:
# Recommendation based on past watched movies, and this time two movies is there in the history.
recommender.recommend_on_history()

In [None]:
# Recommendation based on this movie
recommender.recommend_on_movie('Dracula')

In [None]:
# Recommendation based on past watched movies, and this time three movies is there in the history.
recommender.recommend_on_history()

In [None]:
# Recommendation based on this movie
recommender.recommend_on_movie('Money Train (1995)')

In [None]:
# Recommendation based on past watched movies, and this time four movies is there in the history.
recommender.recommend_on_history()

In [None]:
# Recommendation based on this movie
recommender.recommend_on_movie('GoldenEye (1995)')

In [None]:
# Recommendation based on past watched movies, and this time five movies is there in the history.
recommender.recommend_on_history()

# <center> **Thank You**