In [6]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Sample movie data (Movie, Genre)
data = {'Movie': ['The Dark Knight', 'Inception', 'Interstellar', 'The Prestige',
                   'The Shawshank Redemption', 'The Godfather', 'Pulp Fiction', 'Forrest Gump',
                   'Good Will Hunting', 'The Matrix', 'Gladiator', 'Saving Private Ryan',
                   'Back to the Future', 'Jurassic Park', 'Toy Story', 'Finding Nemo'],
        'Genre': ['Action, Crime, Drama', 'Action, Adventure, Sci-Fi', 'Adventure, Drama, Sci-Fi', 'Drama, Mystery, Sci Fi',
                  'Drama', 'Crime, Drama', 'Crime, Drama', 'Drama, Romance',
                  'Drama, Romance', 'Action, Sci-Fi', 'Action, Adventure, Drama', 'Drama, War',
                  'Adventure, Comedy, Sci-Fi', 'Adventure, Sci-Fi', 'Animation, Adventure, Comedy', 'Animation, Adventure, Comedy']}

df = pd.DataFrame(data)

# Create a TF-IDF Vectorizer to convert genre strings into numerical data
tfidf = TfidfVectorizer(stop_words='english')

# Fit and transform the data
tfidf_matrix = tfidf.fit_transform(df['Genre'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Create a mapping of movie titles to their index
indices = pd.Series(df.index, index=df['Movie']).drop_duplicates()

def get_recommendations(title, cosine_sim=cosine_sim):
    """
    This function takes a movie title and returns a list of recommended movies.
    """
    try:
        # Get the index of the movie that matches the title
        idx = indices[title]

        # Get the pairwise similarity scores of all movies with that movie
        sim_scores = list(enumerate(cosine_sim[idx]))

        # Sort the movies based on the similarity scores
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

        # Get the scores of the 10 most similar movies
        sim_scores = sim_scores[1:6]  # Get top 5, excluding the movie itself

        # Get the movie indices
        movie_indices = [i[0] for i in sim_scores]

        # Return the top 10 most similar movies
        return df['Movie'].iloc[movie_indices]

    except KeyError:
        return "Movie not found in the database. Please try another movie."

if __name__ == '__main__':
    # Get user input for a movie they like
    user_movie = input("Enter a movie you like to get recommendations: ")

    # Get and print the recommendations
    recommendations = get_recommendations(user_movie)

    print("\nRecommended Movies:")
    print(recommendations)

Enter a movie you like to get recommendations: The Prestige

Recommended Movies:
2           Interstellar
13         Jurassic Park
9             The Matrix
1              Inception
12    Back to the Future
Name: Movie, dtype: object
