In [14]:
import os
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import sigmoid_kernel

In [7]:
os.chdir("E:\Data Science Project\Movie Recommendation System with sentiment analysis-Project 3")

In [8]:
main_data = pd.read_csv("main_data.csv")

In [9]:
main_data

Unnamed: 0,director_name,actor_1_name,actor_2_name,actor_3_name,genres,movie_title,comb
0,James Cameron,CCH Pounder,Joel David Moore,Wes Studi,Action|Adventure|Fantasy|Sci-Fi,avatar,CCH Pounder Joel David Moore Wes Studi James C...
1,Gore Verbinski,Johnny Depp,Orlando Bloom,Jack Davenport,Action|Adventure|Fantasy,pirates of the caribbean: at world's end,Johnny Depp Orlando Bloom Jack Davenport Gore ...
2,Sam Mendes,Christoph Waltz,Rory Kinnear,Stephanie Sigman,Action|Adventure|Thriller,spectre,Christoph Waltz Rory Kinnear Stephanie Sigman ...
3,Christopher Nolan,Tom Hardy,Christian Bale,Joseph Gordon-Levitt,Action|Thriller,the dark knight rises,Tom Hardy Christian Bale Joseph Gordon-Levitt ...
4,Doug Walker,Doug Walker,Rob Walker,Unknown,Documentary,star wars: episode vii - the force awakens ...,Doug Walker Rob Walker Unknown Doug Walker Doc...
...,...,...,...,...,...,...,...
6056,Paul W. S. Anderson,Milla Jovovich,Tony Jaa,"Tip ""T.I."" Harris",Fantasy Action Adventure,monster hunter,"Milla Jovovich Tony Jaa Tip ""T.I."" Harris Paul..."
6057,Regina King,Kingsley Ben-Adir,Eli Goree,Aldis Hodge,Drama,one night in miami,Kingsley Ben-Adir Eli Goree Aldis Hodge Regina...
6058,Emerald Fennell,Carey Mulligan,Bo Burnham,Alison Brie,Thriller Crime Drama,promising young woman,Carey Mulligan Bo Burnham Alison Brie Emerald ...
6059,Eugene Ashe,Tessa Thompson,Nnamdi Asomugha,Ryan Michelle Bathe,Drama,sylvie's love,Tessa Thompson Nnamdi Asomugha Ryan Michelle B...


In [10]:
tfv = TfidfVectorizer(min_df=3, max_features = None,
                     strip_accents='unicode', analyzer='word', token_pattern = r'\w{1,}',
                      ngram_range=(1,3),
                      stop_words='english')

# Filling NaNs with empty strings
main_data['comb']=main_data['comb'].fillna('')

In [11]:
# Fitting the Tf-Idf on the "comb" text
tfv_matrix = tfv.fit_transform(main_data['comb'])

In [12]:
tfv_matrix

<6061x7841 sparse matrix of type '<class 'numpy.float64'>'
	with 96976 stored elements in Compressed Sparse Row format>

In [13]:
tfv_matrix.shape

(6061, 7841)

In [15]:
# Computing the sigmoid kernel
sig = sigmoid_kernel(tfv_matrix, tfv_matrix)

In [16]:
sig[0]

array([0.76164771, 0.76159806, 0.76159573, ..., 0.76159416, 0.76159416,
       0.76159416])

In [19]:
# Reverse mapping of indices and movie titles
indices = pd.Series(main_data.index, index=main_data['movie_title']).drop_duplicates()

In [20]:
indices

movie_title
avatar                                                       0
pirates of the caribbean: at world's end                     1
spectre                                                      2
the dark knight rises                                        3
star wars: episode vii - the force awakens                   4
                                                          ... 
monster hunter                                            6056
one night in miami                                        6057
promising young woman                                     6058
sylvie's love                                             6059
pieces of a woman                                         6060
Length: 6061, dtype: int64

In [21]:
# Function to get recommendation

def give_rec(title, sig=sig):
    
    # Get the index corresponding to movie titles
    idx = indices[title]
    
    # Get the pairwise similarity scores
    sig_scores = list(enumerate(sig[idx]))
    
    # Sort the movies
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)
    
    # Scores of 10 most similar movies
    sig_scores = sig_scores[1:11]
    
    # Movie indices
    movie_indices = [i[0] for i in sig_scores]
    
    # Returning top 10 most similar movies
    return main_data['movie_title'].iloc[movie_indices]  

In [27]:
# Testing our content based recommendation system
give_rec('monster hunter')

362           the three musketeers
1373                 resident evil
778       resident evil: afterlife
635     resident evil: retribution
464                        pompeii
2626                     ong-bak 2
636                     death race
748                  event horizon
554        avp: alien vs. predator
516                        soldier
Name: movie_title, dtype: object