<a href="https://colab.research.google.com/github/Jenicisprakash/machine-learning/blob/main/Movie_Recommentation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [32]:
# Sample movie dataset (You can replace this with a real movie dataset)
data = pd.read_csv("/content/movie_metadata_genres.csv")

In [33]:
# Convert the dataset into a DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy
27275,131258,The Pirates (2014),Adventure
27276,131260,Rentun Ruusu (2001),(no genres listed)


In [34]:
# Define a TF-IDF Vectorizer to transform the genre text into vectors
tfidf = TfidfVectorizer(stop_words='english')

In [35]:
# Fit and transform the genre column into a matrix of TF-IDF features
tfidf_matrix = tfidf.fit_transform(df['genres'])

In [36]:
# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [41]:
# Function to recommend movies based on cosine similarity
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = df[df['title'] == title].index[0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the 5 most similar movies
    sim_scores = sim_scores[1:6]  # Exclude the movie itself, so start from 1

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the titles of the most similar movies
    return df['title'].iloc[movie_indices]



In [45]:
# Test the recommendation system with an example
movie_title = input()
recommended_movies = get_recommendations(movie_title)

print(f"\nMovies recommended for '{movie_title}':")
for movie in recommended_movies:
    print(f"\n{movie}")

Kein Bund für's Leben (2007)

Movies recommended for 'Kein Bund für's Leben (2007)':

Four Rooms (1995)

Ace Ventura: When Nature Calls (1995)

Bio-Dome (1996)

Friday (1995)

Black Sheep (1996)
