# Importing Libraries

In [46]:
import numpy as np
import pandas as pd
import os 
import shutil
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Analyzing the dataset

In [3]:
movies = pd.read_csv('.\Dataset\movies.csv',sep=';',encoding='latin-1').drop('Unnamed: 3',axis=1)
print('Shape of this dataset :',movies.shape)
movies.head()

Shape of this dataset : (3883, 3)


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [8]:
ratings = pd.read_csv('.\Dataset\\ratings.csv',sep=';')
print('Shape of this dataset :',ratings.shape)
ratings.head()

Shape of this dataset : (1000209, 4)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [13]:
users = pd.read_csv('.\Dataset\\users.csv',sep=';')
print('Shape of this dataset :',users.shape)
users.head()

Shape of this dataset : (6040, 5)


Unnamed: 0,userId,gender,age,occupation,zip-code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


# Collaborative Filtering: getting 50 closest movies

In [14]:
# Pivot table for ratings given by each user to each movie
rating_pivot = ratings.pivot_table(values='rating',columns='userId',index='movieId').fillna(0)
print('Shape of this pivot table :',rating_pivot.shape)
rating_pivot.head()

Shape of this pivot table : (3706, 6040)


userId,1,2,3,4,5,6,7,8,9,10,...,6031,6032,6033,6034,6035,6036,6037,6038,6039,6040
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,5.0,5.0,...,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,3.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [15]:
# Applying KNN algorithm on the pivot table 
nn_algo = NearestNeighbors(metric='cosine')
nn_algo.fit(rating_pivot)

In [27]:
class Recommender:
    def __init__(self):
        # This list will stored movies that called atleast ones using recommend_on_movie method
        self.hist = [] 
        self.ishist = False # Check if history is empty
    
    # This method will recommend movies based on a movie that passed as the parameter
    def recommend_on_movie(self,movie,n_reccomend = 20):
        self.ishist = True
        movieid = int(movies[movies['title']==movie]['movieId'])
        self.hist.append(movieid)
        distance,neighbors = nn_algo.kneighbors([rating_pivot.loc[movieid]],n_neighbors=n_reccomend+1)
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movies[movies['movieId']==mid]['title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in [movieid]]
        return recommeds[:n_reccomend]
    
    # This method will recommend movies based on history stored in self.hist list
    def recommend_on_history(self,n_reccomend = 20):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(rating_pivot.loc[mid]) for mid in self.hist])
        distance,neighbors = nn_algo.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.hist))
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movies[movies['movieId']==mid]['title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in self.hist]
        return recommeds[:n_reccomend]

In [28]:
recommender = Recommender()

In [30]:
# Recommendation based on past watched movies, but the object just initialized. So, therefore no history found
print("Movies: \n", recommender.recommend_on_history() )
print("History:\n" ,recommender.hist)

No history found
Movies: 
 None
History:
 []


In [31]:
# Recommendation based on this movie 
print("Movies: \n", recommender.recommend_on_movie('Father of the Bride Part II (1995)') )
print("History: \n",recommender.hist)

Movies: 
 ['Home Alone (1990)', 'Home Alone 2', 'Mighty Ducks, The (1992)', 'Mrs. Doubtfire (1993)', 'Liar Liar (1997)', 'First Wives Club, The (1996)', "You've Got Mail (1998)", 'Flintstones, The (1994)', 'Multiplicity (1996)', 'Nine Months (1995)', 'Ace Ventura', 'Dumb & Dumber (1994)', 'Sister Act (1992)', 'Waterboy, The (1998)', 'City Slickers II', 'Sister Act 2', 'Robin Hood', "My Best Friend's Wedding (1997)", 'Richie Rich (1994)', 'Brady Bunch Movie, The (1995)']
History: 
 [5]


In [32]:
print(recommender.recommend_on_history())

['Home Alone (1990)', 'Home Alone 2', 'Mighty Ducks, The (1992)', 'Mrs. Doubtfire (1993)', 'Liar Liar (1997)', 'First Wives Club, The (1996)', "You've Got Mail (1998)", 'Flintstones, The (1994)', 'Multiplicity (1996)', 'Nine Months (1995)', 'Ace Ventura', 'Dumb & Dumber (1994)', 'Sister Act (1992)', 'Waterboy, The (1998)', 'City Slickers II', 'Sister Act 2', 'Robin Hood', "My Best Friend's Wedding (1997)", 'Richie Rich (1994)', 'Brady Bunch Movie, The (1995)']


# Content Based Filtering on the movies we got from above

In [36]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(stop_words='english')
genres = vectorizer.fit_transform(movies.genres).toarray()
feature_names = vectorizer.get_feature_names_out()
contents = pd.DataFrame(genres,columns= feature_names)
print('Shape of the content table :',contents.shape)
contents.head()

Shape of the content table : (3883, 347)


Unnamed: 0,1919,1956,1963,1968,1974,1977,1978,1979,1980,1981,...,wight,willowbrook,witch,worrying,wrath,years,yellow,yes,york,yu
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
print(feature_names)

['1919' '1956' '1963' '1968' '1974' '1977' '1978' '1979' '1980' '1981'
 '1982' '1983' '1984' '1985' '1986' '1987' '1988' '1989' '1990' '1991'
 '1992' '1993' '1994' '1995' '1996' '1997' '1998' '1999' '2000' '3d' '79'
 'aardman' 'action' 'adventure' 'adventures' 'airport' 'al' 'alive'
 'america' 'american' 'angels' 'animation' 'annihilation' 'arc'
 'assassination' 'assignment' 'baloo' 'bananas' 'batch' 'bath' 'bear'
 'beginning' 'best' 'bigger' 'blood' 'bloodline' 'blue' 'body' 'bomb'
 'bordello' 'bowles' 'brain' 'burn' 'business' 'calls' 'candy' 'carrie'
 'castrato' 'chainsaw' 'chao' 'chapter' 'child' 'children' 'cinderella'
 'citizens' 'city' 'clear' 'comedy' 'conflict' 'contact' 'control'
 'country' 'crackdown' 'crime' 'cruise' 'curly' 'curse' 'cut' 'cyberspace'
 'dare' 'dark' 'day' 'dead' 'death' 'deliver' 'demon' 'der' 'detective'
 'documentary' 'dog' 'dollhouse' 'dorothy' 'drama' 'dream' 'ducks' 'eagle'
 'earth' 'empire' 'end' 'endgame' 'episode' 'evil' 'face' 'fall' 'fantasy'
 'fa

In [38]:
nn_algo = NearestNeighbors(metric='cosine')
nn_algo.fit(contents)

In [41]:
class Recommender:
    def __init__(self):
        # This list will stored movies that called atleast ones using recommend_on_movie method
        self.hist = [] 
        self.ishist = False # Check if history is empty
    
    # This method will recommend movies based on a movie that passed as the parameter
    def recommend_on_movie(self,movie,n_reccomend = 5):
        self.ishist = True
        iloc = movies[movies['title']==movie].index[0]
        self.hist.append(iloc)
        distance,neighbors = nn_algo.kneighbors([contents.iloc[iloc]],n_neighbors=n_reccomend+1)
        recommeds = [movies.iloc[i]['title'] for i in neighbors[0] if i not in [iloc]]
        return recommeds[:n_reccomend]
    
    # This method will recommend movies based on history stored in self.hist list
    def recommend_on_history(self,n_reccomend = 5):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(contents.iloc[iloc]) for iloc in self.hist])
        distance,neighbors = nn_algo.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.hist))
        recommeds = [movies.iloc[i]['title'] for i in neighbors[0] if i not in self.hist]
        return recommeds[:n_reccomend]

In [42]:
recommender = Recommender()

In [43]:
# Recommendation based on past watched movies, but the object just initialized. So, therefore no history found
print("Movies: \n", recommender.recommend_on_history() )
print("History:\n" ,recommender.hist)

No history found
Movies: 
 None
History:
 []


In [44]:
# Recommendation based on this movie 
print("Movies: \n", recommender.recommend_on_movie('Father of the Bride Part II (1995)') )
print("History: \n",recommender.hist)

Movies: 
 ['Waiting for Guffman (1996)', 'Jimmy Hollywood (1994)', 'Kolya (1996)', 'Life with Mikey (1993)', '8 1/2 Women (1999)']
History: 
 [4]




In [45]:
print(recommender.recommend_on_history())

['Waiting for Guffman (1996)', 'Jimmy Hollywood (1994)', 'Kolya (1996)', 'Life with Mikey (1993)', '8 1/2 Women (1999)']


