In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
movies = pd.read_csv('movies.csv', sep=',', encoding='latin-1', usecols=['title', 'genres'])

In [3]:
movies['genres'] = movies['genres'].str.split('|')
movies['genres'] = movies['genres'].fillna("").astype('str')

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(movies['genres'])
tfidf_matrix.shape

(9742, 177)

In [5]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
cosine_sim[:4, :4]

array([[1.        , 0.31379419, 0.0611029 , 0.05271111],
       [0.31379419, 1.        , 0.        , 0.        ],
       [0.0611029 , 0.        , 1.        , 0.35172407],
       [0.05271111, 0.        , 0.35172407, 1.        ]])

In [6]:
titles = movies['title']
indices = pd.Series(movies.index, index=movies['title'])

In [7]:
def genre_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    movie_indices = [i[0] for i in sim_scores]
    return titles.iloc[movie_indices]

In [8]:
genre_recommendations('Dark Knight, The (2008)').head(20)

8387                          Need for Speed (2014)
8149      Grandmaster, The (Yi dai zong shi) (2013)
123                                Apollo 13 (1995)
8026                              Life of Pi (2012)
8396                                    Noah (2014)
38                           Dead Presidents (1995)
341                              Bad Company (1995)
347             Faster Pussycat! Kill! Kill! (1965)
430                        Menace II Society (1993)
568                          Substitute, The (1996)
665                          Nothing to Lose (1994)
1645                       Untouchables, The (1987)
1696                           Monument Ave. (1998)
2563                              Death Wish (1974)
2574                        Band of the Hand (1986)
3037                              Foxy Brown (1974)
3124    Harley Davidson and the Marlboro Man (1991)
3167                                Scarface (1983)
3217                               Swordfish (2001)
3301        

In [30]:
def genre_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    movie_indices = [i[0] for i in sim_scores]
    
    recommendations = movies.iloc[movie_indices][['title', 'genres']]
    recommendations['similarity_score'] = [i[1] for i in sim_scores]
    
    return recommendations

In [32]:
print("Recommendations for 'Ghost (1990)':")
genre_recommendations('Ghost (1990)')

Recommendations for 'Ghost (1990)':


Unnamed: 0,title,genres,similarity_score
6905,Twilight (2008),"['Drama', 'Fantasy', 'Romance', 'Thriller']",0.94052
1085,Michael (1996),"['Comedy', 'Drama', 'Fantasy', 'Romance']",0.844785
1530,"Purple Rose of Cairo, The (1985)","['Comedy', 'Drama', 'Fantasy', 'Romance']",0.844785
2103,Big (1988),"['Comedy', 'Drama', 'Fantasy', 'Romance']",0.844785
2350,"Fisher King, The (1991)","['Comedy', 'Drama', 'Fantasy', 'Romance']",0.844785
2510,Defending Your Life (1991),"['Comedy', 'Drama', 'Fantasy', 'Romance']",0.844785
3097,"Price of Milk, The (2000)","['Comedy', 'Drama', 'Fantasy', 'Romance']",0.844785
3249,Alice (1990),"['Comedy', 'Drama', 'Fantasy', 'Romance']",0.844785
4356,Bruce Almighty (2003),"['Comedy', 'Drama', 'Fantasy', 'Romance']",0.844785
4744,Juliet of the Spirits (Giulietta degli spiriti...,"['Comedy', 'Drama', 'Fantasy', 'Romance']",0.844785


In [33]:
print("Recommendations for 'Terminator, The (1984)':")
genre_recommendations('Terminator, The (1984)')

Recommendations for 'Terminator, The (1984)':


Unnamed: 0,title,genres,similarity_score
68,Screamers (1995),"['Action', 'Sci-Fi', 'Thriller']",1.0
144,Johnny Mnemonic (1995),"['Action', 'Sci-Fi', 'Thriller']",1.0
296,Virtuosity (1995),"['Action', 'Sci-Fi', 'Thriller']",1.0
336,Timecop (1994),"['Action', 'Sci-Fi', 'Thriller']",1.0
474,Blade Runner (1982),"['Action', 'Sci-Fi', 'Thriller']",1.0
567,Solo (1996),"['Action', 'Sci-Fi', 'Thriller']",1.0
601,"Arrival, The (1996)","['Action', 'Sci-Fi', 'Thriller']",1.0
939,"Terminator, The (1984)","['Action', 'Sci-Fi', 'Thriller']",1.0
1373,Godzilla (1998),"['Action', 'Sci-Fi', 'Thriller']",1.0
1939,"Matrix, The (1999)","['Action', 'Sci-Fi', 'Thriller']",1.0
