# Movie recommendation System using cosine similarity


In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [7]:


# Sample movie dataset (You can replace this with a real movie dataset)

data = {
    'movie_id': list(range(1, 21)),  # More movies added
    'title': [
        'The Matrix', 'John Wick', 'The Godfather', 'Pulp Fiction', 'The Dark Knight',
        'Inception', 'Interstellar', 'Fight Club', 'Forrest Gump', 'The Shawshank Redemption',
        'The Avengers', 'Titanic', 'The Lord of the Rings: The Return of the King',
        'Gladiator', 'The Lion King', 'Avengers: Endgame', 'Joker', 'Parasite',
        'Coco', 'Whiplash'
    ],
    'genre': [
        'Action, Sci-Fi', 'Action, Thriller', 'Crime, Drama', 'Crime, Drama', 'Action, Crime, Drama',
        'Sci-Fi, Thriller', 'Sci-Fi, Drama', 'Drama', 'Drama, Romance', 'Drama, Crime',
        'Action, Adventure, Sci-Fi', 'Romance, Drama', 'Action, Adventure, Fantasy',
        'Action, Drama, History', 'Animation, Adventure, Drama', 'Action, Adventure, Sci-Fi',
        'Crime, Drama, Thriller', 'Drama, Thriller', 'Animation, Adventure, Family', 'Drama, Music'
    ]
}



In [8]:
# Convert the dataset into a DataFrame
df = pd.DataFrame(data)

# Display the dataset
print("Movie Data:")
print(df)


Movie Data:
    movie_id                                          title  \
0          1                                     The Matrix   
1          2                                      John Wick   
2          3                                  The Godfather   
3          4                                   Pulp Fiction   
4          5                                The Dark Knight   
5          6                                      Inception   
6          7                                   Interstellar   
7          8                                     Fight Club   
8          9                                   Forrest Gump   
9         10                       The Shawshank Redemption   
10        11                                   The Avengers   
11        12                                        Titanic   
12        13  The Lord of the Rings: The Return of the King   
13        14                                      Gladiator   
14        15                               

In [9]:
# Define a TF-IDF Vectorizer to transform the genre text into vectors
tfidf = TfidfVectorizer(stop_words='english')

# Fit and transform the genre column into a matrix of TF-IDF features
tfidf_matrix = tfidf.fit_transform(df['genre'])

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [10]:
# Function to recommend movies based on cosine similarity
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = df[df['title'] == title].index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the 3 most similar movies
    sim_scores = sim_scores[1:4]  # Exclude the movie itself, so start from 1

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the titles of the most similar movies
    return df['title'].iloc[movie_indices]


In [12]:
# Test the recommendation system with an example
movie_title = 'Titanic'
recommended_movies = get_recommendations(movie_title)

print(f"\nMovies recommended for '{movie_title}':")
for movie in recommended_movies:
    print(movie)



Movies recommended for 'Titanic':
Titanic
Fight Club
The Godfather
