In [1]:
import pandas as pd

In [2]:
movies_data = pd.read_csv('dataset/movies.dat', sep = '::', engine='python')

In [3]:
#spitting the genres which are separated by '|'
movies_data['genres'] = movies_data.genres.str.split('|')
# Convert genres to string value
movies_data['genres'] = movies_data['genres'].fillna("").astype('str')
movies_data.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),"['Animation', ""Children's"", 'Comedy']"
1,2,Jumanji (1995),"['Adventure', ""Children's"", 'Fantasy']"
2,3,Grumpier Old Men (1995),"['Comedy', 'Romance']"
3,4,Waiting to Exhale (1995),"['Comedy', 'Drama']"
4,5,Father of the Bride Part II (1995),['Comedy']


In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(ngram_range=(1, 2), stop_words='english')
tfidf_matrix = tf.fit_transform(movies_data['genres'])
tfidf_matrix.shape

(3883, 127)

In [5]:
from sklearn.metrics.pairwise import cosine_similarity
cos_similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)
cos_similarity.shape

(3883, 3883)

In [6]:
# Build a 1-dimensional array with movie titles
titles = movies_data['title']
indices = pd.Series(movies_data.index, index=movies_data['title'])
titles.head()

0                      Toy Story (1995)
1                        Jumanji (1995)
2               Grumpier Old Men (1995)
3              Waiting to Exhale (1995)
4    Father of the Bride Part II (1995)
Name: title, dtype: object

In [7]:
def get_recommendations(title, cos_similarity = cos_similarity):
    idx = indices[title]
    sim_scores = list(enumerate(cos_similarity[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    
    movie_indices = [i[0] for i in sim_scores]
    
    return titles[movie_indices]

In [10]:
get_recommendations('Waiting to Exhale (1995)')

44                              To Die For (1995)
71                   Kicking and Screaming (1995)
74                               Big Bully (1996)
83             Last Summer in the Hamptons (1995)
104    Nobody Loves Me (Keiner liebt mich) (1994)
131                              Nueba Yol (1995)
164                   Doom Generation, The (1995)
203                        Unstrung Heroes (1995)
216                       Boys on the Side (1995)
229                    Eat Drink Man Woman (1994)
Name: title, dtype: object