1. Data Loading and Preview

In [42]:
import pandas as pd

# Load the dataset
df = pd.read_csv('C:/Users/DELL/movies.txt')

# Preview the data
print(df.head())


   movieId                     title         genres
0        1  The Shawshank Redemption          Drama
1        2             The Godfather    Drama|Crime
2        3           The Dark Knight   Action|Crime
3        4                 Inception  Action|Sci-Fi
4        5                The Matrix  Action|Sci-Fi


2. Feature Extraction with TF-IDF Vectorizer


In [43]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize the TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')

# Fit and transform the genre data
tfidf_matrix = tfidf.fit_transform(df['genres'])


3. Compute Cosine Similarity


In [44]:
from sklearn.metrics.pairwise import linear_kernel

# Compute cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)


4. Create Index for Movie Titles


In [45]:
# Create a Series with movie titles as index
indices = pd.Series(df.index, index=df['title']).to_dict()


5. Recommendation Function


In [46]:
def recommend_movie(title, cosine_sim=cosine_sim):
    # Get the index of the movie
    idx = indices.get(title)
    if idx is None:
        return "Movie not found in database."

    # Get pairwise similarity scores of all movies with the given movie
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the movies based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the indices of the top 6 most similar movies
    sim_scores = sim_scores[1:6]
    movie_indices = [i[0] for i in sim_scores]
    
    # Return the top 5 most similar movies
    return df['title'].iloc[movie_indices]


6. Example Usage


In [47]:
# Example usage
print(recommend_movie('Inception'))

4          The Matrix
11             Avatar
17      Jurassic Park
15       Interstellar
2     The Dark Knight
Name: title, dtype: object
