In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


# Sample dataset
data = {
    'title': [
        'The Shawshank Redemption', 'The Godfather', 'The Dark Knight', 'Pulp Fiction', 'Fight Club',
        'Forrest Gump', 'Inception', 'The Matrix', 'Goodfellas', 'The Silence of the Lambs'
    ],
    'genre': [
        'Drama', 'Crime, Drama', 'Action, Crime, Drama', 'Crime, Drama', 'Drama',
        'Drama, Romance', 'Action, Adventure, Sci-Fi', 'Action, Sci-Fi', 'Biography, Crime, Drama', 'Crime, Drama, Thriller'
    ],
    'description': [
        'Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency.',
        'The aging patriarch of an organized crime dynasty transfers control of his clandestine empire to his reluctant son.',
        'When the menace known as the Joker emerges from his mysterious past, he wreaks havoc and chaos on the people of Gotham.',
        'The lives of two mob hitmen, a boxer, a gangster\'s wife, and a pair of diner bandits intertwine in four tales of violence and redemption.',
        'An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much more.',
        'The presidencies of Kennedy and Johnson, the Vietnam War, the Watergate scandal and other historical events unfold from the perspective of an Alabama man with an IQ of 75.',
        'A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into the mind of a CEO.',
        'A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers.',
        'The story of Henry Hill and his life in the mob, covering his relationship with his wife Karen Hill and his mob partners Jimmy Conway and Tommy DeVito.',
        'A young F.B.I. cadet must receive the help of an incarcerated and manipulative cannibal killer to help catch another serial killer, a madman who skins his victims.'
    ]
}

# Create a DataFrame
df = pd.DataFrame(data)
print (df)

# Function to recommend movies based on a given movie title
def recommend_movies(movie_title, num_recommendations=5):
    # Initialize the TF-IDF Vectorizer
    tfidf = TfidfVectorizer(stop_words='english')

    # Fit and transform the movie descriptions
    tfidf_matrix = tfidf.fit_transform(df['description'])

    # Calculate the cosine similarity matrix
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Get the index of the movie that matches the title
    idx = df.index[df['title'] == movie_title].tolist()[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the most similar movies
    sim_scores = sim_scores[1:num_recommendations + 1]

    # Return the top n most similar movies
    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices]

# Example usage
recommended_movies = recommend_movies('The Matrix', num_recommendations=5)
print("Recommended movies:")
print(recommended_movies)


                      title                      genre  \
0  The Shawshank Redemption                      Drama   
1             The Godfather               Crime, Drama   
2           The Dark Knight       Action, Crime, Drama   
3              Pulp Fiction               Crime, Drama   
4                Fight Club                      Drama   
5              Forrest Gump             Drama, Romance   
6                 Inception  Action, Adventure, Sci-Fi   
7                The Matrix             Action, Sci-Fi   
8                Goodfellas    Biography, Crime, Drama   
9  The Silence of the Lambs     Crime, Drama, Thriller   

                                         description  
0  Two imprisoned men bond over a number of years...  
1  The aging patriarch of an organized crime dyna...  
2  When the menace known as the Joker emerges fro...  
3  The lives of two mob hitmen, a boxer, a gangst...  
4  An insomniac office worker and a devil-may-car...  
5  The presidencies of Kennedy 