In [None]:
import numpy as np
import pandas as pd

In [7]:
movies = pd.read_csv('/content/movies.csv',sep=';',encoding='latin-1').drop('Unnamed: 3',axis=1)
print('Shape of this dataset :',movies.shape)
movies.head()

Shape of this dataset : (3883, 3)


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [11]:
ratings = pd.read_csv('/content/ratings.csv',sep=';')
print('Shape of this dataset :',ratings.shape)
ratings.head()

Shape of this dataset : (48823, 4)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [13]:
users = pd.read_csv('/content/users.csv',sep=';')
print('Shape of this dataset :',users.shape)
users.head()

Shape of this dataset : (6040, 5)


Unnamed: 0,userId,gender,age,occupation,zip-code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [14]:
rating_pivot = ratings.pivot_table(values='rating',columns='userId',index='movieId').fillna(0)
print('Shape of this pivot table :',rating_pivot.shape)
rating_pivot.head()

Shape of this pivot table : (3049, 327)


userId,1,2,3,4,5,6,7,8,9,10,...,318,319,320,321,322,323,324,325,326,327
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,5.0,5.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,5.0,3.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,1.0,0.0,5.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0


In [15]:
from sklearn.neighbors import NearestNeighbors
nn_algo = NearestNeighbors(metric='cosine')
nn_algo.fit(rating_pivot)

NearestNeighbors(metric='cosine')

In [16]:
class Recommender:
    def __init__(self):
        # This list will stored movies that called atleast ones using recommend_on_movie method
        self.hist = [] 
        self.ishist = False # Check if history is empty
    
    # This method will recommend movies based on a movie that passed as the parameter
    def recommend_on_movie(self,movie,n_reccomend = 5):
        self.ishist = True
        movieid = int(movies[movies['title']==movie]['movieId'])
        self.hist.append(movieid)
        distance,neighbors = nn_algo.kneighbors([rating_pivot.loc[movieid]],n_neighbors=n_reccomend+1)
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movies[movies['movieId']==mid]['title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in [movieid]]
        return recommeds[:n_reccomend]
    
    # This method will recommend movies based on history stored in self.hist list
    def recommend_on_history(self,n_reccomend = 5):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(rating_pivot.loc[mid]) for mid in self.hist])
        distance,neighbors = nn_algo.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.hist))
        movieids = [rating_pivot.iloc[i].name for i in neighbors[0]]
        recommeds = [str(movies[movies['movieId']==mid]['title']).split('\n')[0].split('  ')[-1] for mid in movieids if mid not in self.hist]
        return recommeds[:n_reccomend]

In [17]:
# linitializing the Recommender Object
recommender = Recommender()  

In [18]:
# Recommendation based on past watched movies, but the object just initialized. So, therefore no history found
recommender.recommend_on_history()    

No history found


In [19]:
# Recommendation based on this movie 
recommender.recommend_on_movie('Father of the Bride Part II (1995)')

['Guilty as Sin (1993)',
 'Honeymoon in Vegas (1992)',
 'Encino Man (1992)',
 'Bonfire of the Vanities (1990)',
 'Son in Law (1993)']

In [20]:
# Recommendation based on past watched movies, and this time a movie is there in the history.
recommender.recommend_on_history()

['Guilty as Sin (1993)',
 'Honeymoon in Vegas (1992)',
 'Encino Man (1992)',
 'Bonfire of the Vanities (1990)',
 'Son in Law (1993)']

In [21]:
# Recommendation based on this movie
recommender.recommend_on_movie('Tigerland (2000)')

['Following (1998)',
 'Family Thing, A (1996)',
 'Aparajito (1956)',
 'Color of Paradise, The (Rang-e Khoda) (1999)',
 'Peeping Tom (1960)']

In [22]:
# Recommendation based on past watched movies, and this time two movies is there in the history.
recommender.recommend_on_history()

['Guilty as Sin (1993)',
 'Multiplicity (1996)',
 'Bonfire of the Vanities (1990)',
 'Miami Rhapsody (1995)',
 'Encino Man (1992)']

In [23]:
recommender.recommend_on_movie('Dracula')

['Disturbing Behavior (1998)',
 'Twin Dragons (Shuang long hui) (1992)',
 'Sorority House Massacre II (1990)',
 'Mafia! (1998)',
 'Senseless (1998)']

In [25]:
# Recommendation based on past watched movies, and this time three movies is there in the history.
recommender.recommend_on_history()

['World Is Not Enough, The (1999)',
 'Tomorrow Never Dies (1997)',
 'True Lies (1994)',
 'Clear and Present Danger (1994)',
 'Fugitive, The (1993)']

In [26]:
# Recommendation based on this movie
recommender.recommend_on_movie('Money Train (1995)')


['Big Hit, The (1998)',
 'Steal This Movie! (2000)',
 'Geronimo',
 'Firestorm (1998)',
 'Terminal Velocity (1994)']

In [27]:
# Recommendation based on past watched movies, and this time four movies is there in the history.
recommender.recommend_on_history()

['World Is Not Enough, The (1999)',
 'Tomorrow Never Dies (1997)',
 'True Lies (1994)',
 'Clear and Present Danger (1994)',
 'Batman Returns (1992)']

In [24]:
recommender.recommend_on_movie('GoldenEye (1995)')

['Tomorrow Never Dies (1997)',
 'World Is Not Enough, The (1999)',
 'Enemy of the State (1998)',
 'Spy Who Loved Me, The (1977)',
 'Clear and Present Danger (1994)']

In [28]:
# Recommendation based on past watched movies, and this time five movies is there in the history.
recommender.recommend_on_history()

['World Is Not Enough, The (1999)',
 'Tomorrow Never Dies (1997)',
 'True Lies (1994)',
 'Clear and Present Danger (1994)',
 'Batman Returns (1992)']

In [29]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(stop_words='english')
genres = vectorizer.fit_transform(movies.genres).toarray()
contents = pd.DataFrame(genres,columns=vectorizer.get_feature_names())
print('Shape of the content table :',contents.shape)
contents.head()

Shape of the content table : (3883, 347)




Unnamed: 0,1919,1956,1963,1968,1974,1977,1978,1979,1980,1981,...,wight,willowbrook,witch,worrying,wrath,years,yellow,yes,york,yu
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
from sklearn.neighbors import NearestNeighbors
nn_algo = NearestNeighbors(metric='cosine')
nn_algo.fit(contents)

NearestNeighbors(metric='cosine')

In [31]:
class Recommender:
    def __init__(self):
        # This list will stored movies that called atleast ones using recommend_on_movie method
        self.hist = [] 
        self.ishist = False # Check if history is empty
    
    # This method will recommend movies based on a movie that passed as the parameter
    def recommend_on_movie(self,movie,n_reccomend = 5):
        self.ishist = True
        iloc = movies[movies['title']==movie].index[0]
        self.hist.append(iloc)
        distance,neighbors = nn_algo.kneighbors([contents.iloc[iloc]],n_neighbors=n_reccomend+1)
        recommeds = [movies.iloc[i]['title'] for i in neighbors[0] if i not in [iloc]]
        return recommeds[:n_reccomend]
    
    # This method will recommend movies based on history stored in self.hist list
    def recommend_on_history(self,n_reccomend = 5):
        if self.ishist == False:
            return print('No history found')
        history = np.array([list(contents.iloc[iloc]) for iloc in self.hist])
        distance,neighbors = nn_algo.kneighbors([np.average(history,axis=0)],n_neighbors=n_reccomend + len(self.hist))
        recommeds = [movies.iloc[i]['title'] for i in neighbors[0] if i not in self.hist]
        return recommeds[:n_reccomend]

In [32]:
# linitializing the Recommender Object
recommender = Recommender()  

In [33]:
recommender.recommend_on_history()    

No history found


In [34]:
recommender.recommend_on_movie('Father of the Bride Part II (1995)')

  "X does not have valid feature names, but"


['Waiting for Guffman (1996)',
 'Jimmy Hollywood (1994)',
 'Kolya (1996)',
 'Life with Mikey (1993)',
 '8 1/2 Women (1999)']

In [35]:
recommender.recommend_on_history()

  "X does not have valid feature names, but"


['Waiting for Guffman (1996)',
 'Jimmy Hollywood (1994)',
 'Kolya (1996)',
 'Life with Mikey (1993)',
 '8 1/2 Women (1999)']

In [36]:
# Recommendation based on this movie
recommender.recommend_on_movie('Tigerland (2000)')

  "X does not have valid feature names, but"


['Breaking the Waves (1996)',
 'Jails, Hospitals & Hip-Hop (2000)',
 'They Bite (1996)',
 'Black Tights (Les Collants Noirs) (1960)',
 'Identification of a Woman (Identificazione di una donna) (1982)']

In [37]:
# Recommendation based on past watched movies, and this time two movies is there in the history.
recommender.recommend_on_history()

  "X does not have valid feature names, but"


['Sleepover (1995)',
 'Seven Beauties (Pasqualino Settebellezze) (1976)',
 'Virgin Suicides, The (1999)',
 'Man on the Moon (1999)',
 'Two Girls and a Guy (1997)']

In [38]:
# Recommendation based on this movie
recommender.recommend_on_movie('Dracula')

  "X does not have valid feature names, but"


['Nemesis 2',
 'Best of the Best 3',
 'Mighty Morphin Power Rangers',
 'Gumby',
 'Die Hard']

In [39]:
# Recommendation based on past watched movies, and this time three movies is there in the history.
recommender.recommend_on_history()

  "X does not have valid feature names, but"


['Play it to the Bone (1999)',
 'Seven Beauties (Pasqualino Settebellezze) (1976)',
 "Swept Away (Travolti da un insolito destino nell'azzurro mare d'Agosto) (1975)",
 'Muse, The (1999)',
 'Carriers Are Waiting, The (Les Convoyeurs Attendent) (1999)']

In [40]:
# Recommendation based on this movie
recommender.recommend_on_movie('Money Train (1995)')

  "X does not have valid feature names, but"


['Shadow, The (1994)',
 'Black Mask (Hak hap) (1996)',
 'Stranger, The (1994)',
 'Shanghai Noon (2000)',
 'Thunderball (1965)']

In [41]:
# Recommendation based on past watched movies, and this time four movies is there in the history.
recommender.recommend_on_history()

  "X does not have valid feature names, but"


['Get Shorty (1995)',
 'Buffalo 66 (1998)',
 'Faster Pussycat! Kill! Kill! (1965)',
 'Lethal Weapon 4 (1998)',
 'Lethal Weapon 2 (1989)']

In [42]:
# Recommendation based on this movie
recommender.recommend_on_movie('GoldenEye (1995)')

  "X does not have valid feature names, but"


['Anaconda (1997)',
 'Clear and Present Danger (1994)',
 'Surviving the Game (1994)',
 'Chain Reaction (1996)',
 'Rock, The (1996)']

In [43]:
# Recommendation based on past watched movies, and this time five movies is there in the history.
recommender.recommend_on_history()

  "X does not have valid feature names, but"


['Runaway Train (1985)',
 'Daylight (1996)',
 'Con Air (1997)',
 'Fire Down Below (1997)',
 'Outbreak (1995)']