In [12]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernels

# Sample dataset
data = {
    'title': ['The Matrix', 'Avatar', 'Serenity', 'War of the Worlds', 'District 9'],
    'description': [
        'A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers.',
        'A paraplegic Marine dispatched to the moon Pandora on a unique mission becomes torn between following his orders and protecting the world he feels is his home.',
        'The crew of the ship Serenity try to evade an assassin sent to recapture one of their members who is telepathic.',
        'As Earth is invaded by alien tripod fighting machines, one family fights for survival.',
        'An extraterrestrial race forced to live in slum-like conditions on Earth suddenly finds a kindred spirit in a government agent who is exposed to their biotechnology.'
    ]
}

# Convert the dataset to a DataFrame
movies_df = pd.DataFrame(data)

# Initialize the TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the 'description' to a TF-IDF matrix
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df['description'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = movies_df.index[movies_df['title'] == title].tolist()[0]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return movies_df['title'].iloc[movie_indices]

# Example usage
print(get_recommendations('The Matrix'))


1               Avatar
2             Serenity
3    War of the Worlds
4           District 9
Name: title, dtype: object
