<a href="https://colab.research.google.com/github/Jhansipothabattula/Data_Science/blob/main/Day172.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Movie Recommendation system using COSINE similarity

In [1]:
# Import Libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Create the dataset
data = {
    'movie_id': [1, 2, 3, 4, 5],
    'title': ['The Matrix', 'John Wick', 'The Godfather', 'Pulp Fiction', 'The Dark Knight'],
    'genre': ['Action, Sci-Fi', 'Action, Thriller', 'Crime, Drama', 'Crime, Drama', 'Action, Crime, Drama']
}

# Convert the dataset into a DataFrame
df = pd.DataFrame(data)

# Display the dataset
print("Movie Data: ")
print(df)

# Define a TF-IDF Vectorizer to transform the genre text into vectors
tfidf = TfidfVectorizer(stop_words='english')

# Fit and transform teh genre column into a matrix of TF-IDF features
tfidf_matrix = tfidf.fit_transform(df['genre'])

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to recommend movies based on cosine similarity
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = df[df['title'] == title].index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the 2 most similar movies (excluding the movie itself)
    # The screenshot shows sim_scores[1:3] to get the next two best matches
    sim_scores = sim_scores[1:3]

    # Get the movie indexes
    movie_indices = [i[0] for i in sim_scores]

    # Return the titles of the most similar movies
    return df['title'].iloc[movie_indices]

# Test the recommendation system with an example
movie_title = 'The Matrix'
recommended_movies = get_recommendations(movie_title)

print(f"\nMovie recommended for '{movie_title}':")
for movie in recommended_movies:
    print(movie)

Movie Data: 
   movie_id            title                 genre
0         1       The Matrix        Action, Sci-Fi
1         2        John Wick      Action, Thriller
2         3    The Godfather          Crime, Drama
3         4     Pulp Fiction          Crime, Drama
4         5  The Dark Knight  Action, Crime, Drama

Movie recommended for 'The Matrix':
The Dark Knight
John Wick
