In [None]:
pip install pandas scikit-learn

In [None]:
import pandas as pd

# Sample dataset
movies = pd.DataFrame({
    'movie_id': [1, 2, 3, 4, 5],
    'title': ['The Matrix', 'Inception', 'Interstellar', 'The Godfather', 'The Dark Knight'],
    'genres': ['Action|Sci-Fi', 'Action|Adventure|Sci-Fi', 'Adventure|Drama|Sci-Fi', 'Crime|Drama', 'Action|Crime|Drama']
})

print(movies)


In [None]:
from sklearn.preprocessing import MultiLabelBinarizer

# Split genres by '|'
movies['genres'] = movies['genres'].apply(lambda x: x.split('|'))

# Initialize MultiLabelBinarizer
mlb = MultiLabelBinarizer()

# Perform one-hot encoding on genres
genre_encoded = mlb.fit_transform(movies['genres'])

# Create a DataFrame with genre features
genre_df = pd.DataFrame(genre_encoded, columns=mlb.classes_)

# Combine with the original dataframe
movies = pd.concat([movies, genre_df], axis=1)

print(movies)


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarity on genre features
cos_sim = cosine_similarity(genre_df)

# Create a similarity DataFrame
cos_sim_df = pd.DataFrame(cos_sim, index=movies['title'], columns=movies['title'])

print(cos_sim_df)


In [None]:
#make recommendations
def recommend_movies(title, cos_sim_df, top_n=2):
    if title not in cos_sim_df.index:
        print(f"Movie '{title}' not found in the dataset.")
        return
    # Get similarity scores for the movie
    sim_scores = cos_sim_df[title].sort_values(ascending=False)
    # Exclude the movie itself
    sim_scores = sim_scores.drop(title)
    # Get top N similar movies
    top_movies = sim_scores.head(top_n).index.tolist()
    return top_movies

# Example: Recommend movies similar to 'Inception'
recommended = recommend_movies('Inception', cos_sim_df, top_n=2)
print(f"Movies similar to 'Inception': {recommended}")
