## Content Based Recommender System

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

#### Importing Necessary Libraries & DataSet

In [2]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
movies = pd.read_csv(r"C:\Users\lenovo\Desktop\3\DataSet 2\movies.csv")
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


### Checking the Shape and Info about DataSet

In [4]:
## printing the shape of the dataset
print("Movies shape is {}".format(movies.shape))

Movies shape is (9742, 3)


In [5]:
## printing the name and data type of columns of dataset
print(movies.columns)

Index(['movieId', 'title', 'genres'], dtype='object')


In [15]:
final_dataset = movies

final_dataset.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


### Building the Model

All 'genres' words are in english. Our model can understand only numbers so We'll convert the 'genres' into sparse matrix form using either CountVectorizer or TfidfVectorizer. CountVectorizer just counts the words appear, there is a high chances that missing the rare words which could have helped for predicting the model effectively. So We'll use TfidfVectorizer which counts the frequency of the words and normalize them.

In [7]:
# stop words will remove the common english words like a,an,the,etc which increase the words count & create noise

c_vect = TfidfVectorizer()
X = c_vect.fit_transform(movies['genres'])

In [8]:
# for sparse matrix cosine similarity works better
cosine_sim = cosine_similarity(X)

In [12]:
def get_movie_recommendation(movie_name):
    idx = final_dataset[movies['title'].str.contains(movie_name)].index
    if len(idx):
        sorted_list_indices = sorted(list(enumerate(cosine_sim[idx[0]])), key=lambda x: x[1], reverse=True)[1:11]
        sorted_list_indices = list(map(lambda x:x[0],sorted_list_indices))
        return sorted_list_indices
    else : 
        return []

### Testing the Recommender System

In [16]:
### Example 1:-
## In this the movies are recommended on the basis of the genres after the movie name is called

title = "Assassins"
recommended_movie_list = get_movie_recommendation(title)
movies.loc[recommended_movie_list,['title','genres']]

Unnamed: 0,title,genres
22,Assassins (1995),Action|Crime|Thriller
138,Die Hard: With a Vengeance (1995),Action|Crime|Thriller
156,"Net, The (1995)",Action|Crime|Thriller
249,Natural Born Killers (1994),Action|Crime|Thriller
417,Judgment Night (1993),Action|Crime|Thriller
509,Batman (1989),Action|Crime|Thriller
793,Die Hard (1988),Action|Crime|Thriller
1306,Hard Rain (1998),Action|Crime|Thriller
1315,"Replacement Killers, The (1998)",Action|Crime|Thriller
1325,U.S. Marshals (1998),Action|Crime|Thriller


In [17]:
## Example 2 :-
title = "Bob Roberts"
recommended_movie_list = get_movie_recommendation(title)
final_dataset.loc[recommended_movie_list,['title','genres']]

Unnamed: 0,title,genres
17,Four Rooms (1995),Comedy
18,Ace Ventura: When Nature Calls (1995),Comedy
58,Bio-Dome (1996),Comedy
61,Friday (1995),Comedy
79,Black Sheep (1996),Comedy
90,Mr. Wrong (1996),Comedy
92,Happy Gilmore (1996),Comedy
104,"Steal Big, Steal Little (1995)",Comedy
108,Flirting With Disaster (1996),Comedy
113,Down Periscope (1996),Comedy


In [18]:
## Example 3:-
title = "Waiting to Exhale "
recommended_movie_list = get_movie_recommendation(title)
final_dataset.loc[recommended_movie_list,['title','genres']]

Unnamed: 0,title,genres
10,"American President, The (1995)",Comedy|Drama|Romance
47,Mighty Aphrodite (1995),Comedy|Drama|Romance
52,"Postman, The (Postino, Il) (1994)",Comedy|Drama|Romance
83,Beautiful Girls (1996),Comedy|Drama|Romance
165,Something to Talk About (1995),Comedy|Drama|Romance
191,Don Juan DeMarco (1995),Comedy|Drama|Romance
198,Eat Drink Man Woman (Yin shi nan nu) (1994),Comedy|Drama|Romance
243,Nobody's Fool (1994),Comedy|Drama|Romance
309,"Corrina, Corrina (1994)",Comedy|Drama|Romance
317,I Like It Like That (1994),Comedy|Drama|Romance


In [19]:
## Example 4:-
title = "Toy Story "
recommended_movie_list = get_movie_recommendation(title)
final_dataset.loc[recommended_movie_list,['title','genres']]

Unnamed: 0,title,genres
1706,Antz (1998),Adventure|Animation|Children|Comedy|Fantasy
2355,Toy Story 2 (1999),Adventure|Animation|Children|Comedy|Fantasy
2809,"Adventures of Rocky and Bullwinkle, The (2000)",Adventure|Animation|Children|Comedy|Fantasy
3000,"Emperor's New Groove, The (2000)",Adventure|Animation|Children|Comedy|Fantasy
3568,"Monsters, Inc. (2001)",Adventure|Animation|Children|Comedy|Fantasy
6194,"Wild, The (2006)",Adventure|Animation|Children|Comedy|Fantasy
6486,Shrek the Third (2007),Adventure|Animation|Children|Comedy|Fantasy
6948,"Tale of Despereaux, The (2008)",Adventure|Animation|Children|Comedy|Fantasy
7760,Asterix and the Vikings (Astérix et les Viking...,Adventure|Animation|Children|Comedy|Fantasy
8219,Turbo (2013),Adventure|Animation|Children|Comedy|Fantasy


                     - - - - - - - - X X X X X X X X - - - - - - - -