In [2]:
import pandas as pd
movies_df=pd.read_csv("movies.csv")
movies_df.head()

Unnamed: 0,movie_id,title,genres,director,cast,release_year,description
0,1,Inception,"Sci-Fi, Thriller",Christopher Nolan,"Leonardo DiCaprio, Tom Hardy",2010,A thief steals secrets through dream invasion.
1,2,Titanic,"Romance, Drama",James Cameron,"Leonardo DiCaprio, Kate Winslet",1997,A tragic love story on a doomed ship.
2,3,Interstellar,"Sci-Fi, Drama",Christopher Nolan,"Matthew McConaughey, Anne Hathaway",2014,A team travels through a wormhole to save Earth.
3,4,The Dark Knight,"Action, Crime, Drama",Christopher Nolan,"Christian Bale, Heath Ledger",2008,Batman faces the chaos created by the Joker.
4,5,The Matrix,"Sci-Fi, Action",The Wachowskis,"Keanu Reeves, Laurence Fishburne",1999,A hacker learns about a simulated reality.


movies_df.isnull().sum()

In [5]:
movies_df['combined_features']=(
    movies_df['title']+' '+movies_df['genres']+' '+movies_df['director']+' '+movies_df['cast']+' '+movies_df['description']
)
movies_df[['title','combined_features']].head()

Unnamed: 0,title,combined_features
0,Inception,"Inception Sci-Fi, Thriller Christopher Nolan L..."
1,Titanic,"Titanic Romance, Drama James Cameron Leonardo ..."
2,Interstellar,"Interstellar Sci-Fi, Drama Christopher Nolan M..."
3,The Dark Knight,"The Dark Knight Action, Crime, Drama Christoph..."
4,The Matrix,"The Matrix Sci-Fi, Action The Wachowskis Keanu..."


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf=TfidfVectorizer(stop_words='english')
tfidf_matrix=tfidf.fit_transform(movies_df['combined_features'])
print("TF-IDF Matrix Shape:",tfidf_matrix.shape)

TF-IDF Matrix Shape: (170, 1383)


In [8]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim=cosine_similarity(tfidf_matrix,tfidf_matrix)
print('Cosine Similarity Matrix Shape:',cosine_sim.shape)

Cosine Similarity Matrix Shape: (170, 170)


In [14]:
def get_index_from_title(title):
    return movies_df[movies_df['title'].str.lower() == title.lower()].index.values[0]

def get_title_from_index(index):
    return movies_df.iloc[index]['title']


def recommend_movies(movie_title, num_recommendations=5):
    try:
        movie_index = get_index_from_title(movie_title)
        similarity_scores = list(enumerate(cosine_sim[movie_index]))
        sorted_similar_movies = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
        
        print(f"\nMovies similar to '{movie_title}':\n")
        for i, (index, score) in enumerate(sorted_similar_movies, 1):
            print(f"{i}. {get_title_from_index(index)}  (Similarity Score: {round(score, 2)})")
    
    except IndexError:
        print(" Movie not found. Please check the title")

In [15]:
recommend_movies("inception")


Movies similar to 'inception':

1. The Revenant  (Similarity Score: 0.3)
2. Interstellar  (Similarity Score: 0.18)
3. Tenet  (Similarity Score: 0.17)
4. Titanic  (Similarity Score: 0.14)
5. The Prestige  (Similarity Score: 0.12)
