In [1]:
#Importing Libraries
import pandas as pd
import numpy as np

In [3]:
#Importing csv file 
movies_recomendation = pd.read_csv('./movies.csv', sep=',', encoding='latin-1', usecols=['movieId', 'title', 'genres'])

In [4]:
# Genre column processing
movies_recomendation['genres'] = movies_recomendation['genres'].str.split("|")
movies_recomendation['genres'] = movies_recomendation['genres'].fillna("").astype("str")

In [10]:
# TF-IDF Vectorization
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer='word', ngram_range=(1,2), stop_words='english')
tfidf_matrix = tf.fit_transform(movies_recomendation['genres'])

In [11]:
tfidf_matrix

<9742x177 sparse matrix of type '<class 'numpy.float64'>'
	with 36628 stored elements in Compressed Sparse Row format>

In [12]:
# cosine similarity computation
from sklearn.metrics.pairwise import linear_kernel
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [13]:
np.set_printoptions(threshold=1000)
print(cosine_sim)

[[1.         0.31379419 0.0611029  ... 0.         0.16123168 0.16761358]
 [0.31379419 1.         0.         ... 0.         0.         0.        ]
 [0.0611029  0.         1.         ... 0.         0.         0.36454626]
 ...
 [0.         0.         0.         ... 1.         0.         0.        ]
 [0.16123168 0.         0.         ... 0.         1.         0.        ]
 [0.16761358 0.         0.36454626 ... 0.         0.         1.        ]]


In [14]:
# preparing for recommendation
titles = movies_recomendation['title']
indices = pd.Series(movies_recomendation.index, index=movies_recomendation['title'])

In [15]:
# Genre recommendation Function
def genre_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    movie_indices = [i[0] for i in sim_scores]
    return titles.iloc[movie_indices]


In [16]:
# Example Usage
genre_recommendations('Toy Story (1995)').head(10)


1706                                          Antz (1998)
2355                                   Toy Story 2 (1999)
2809       Adventures of Rocky and Bullwinkle, The (2000)
3000                     Emperor's New Groove, The (2000)
3568                                Monsters, Inc. (2001)
6194                                     Wild, The (2006)
6486                               Shrek the Third (2007)
6948                       Tale of Despereaux, The (2008)
7760    Asterix and the Vikings (AstÃ©rix et les Vikin...
8219                                         Turbo (2013)
Name: title, dtype: object

In [17]:
genre_recommendations('Richard III (1995)').head(10)

65                         MisÃ©rables, Les (1995)
182          Before the Rain (Pred dozhdot) (1994)
294                       Walking Dead, The (1995)
461                        Schindler's List (1993)
535    Land and Freedom (Tierra y libertad) (1995)
605                              Stalingrad (1993)
747                        Nothing Personal (1995)
829                                 Platoon (1986)
872       Tin Drum, The (Blechtrommel, Die) (1979)
883                          Paths of Glory (1957)
Name: title, dtype: object

In [18]:
genre_recommendations("Indian in the Cupboard, The (1995)").head(10)

53                     Indian in the Cupboard, The (1995)
109                     NeverEnding Story III, The (1994)
767                       Escape to Witch Mountain (1975)
1514            Darby O'Gill and the Little People (1959)
1556                                  Return to Oz (1985)
1617                        NeverEnding Story, The (1984)
1618    NeverEnding Story II: The Next Chapter, The (1...
1799                        Santa Claus: The Movie (1985)
3574    Harry Potter and the Sorcerer's Stone (a.k.a. ...
6075    Chronicles of Narnia: The Lion, the Witch and ...
Name: title, dtype: object