In [7]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer



In [8]:
# Sample Bollywood Movies Dataset
def get_sample_movies():
    data = {
        'Movie_ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'Title': [
            'Dilwale Dulhania Le Jayenge',
            'Kabhi Khushi Kabhie Gham',
            '3 Idiots',
            'Dangal',
            'PK',
            'Bahubali',
            'Gully Boy',
            'Queen',
            'Barfi!',
            'Zindagi Na Milegi Dobara'
        ],
        'Genres': [
            ['Romance', 'Drama'],
            ['Drama', 'Family'],
            ['Comedy', 'Drama'],
            ['Sports', 'Drama'],
            ['Comedy', 'Sci-Fi'],
            ['Action', 'Drama', 'Fantasy'],
            ['Drama', 'Music'],
            ['Comedy', 'Drama'],
            ['Romance', 'Comedy'],
            ['Adventure', 'Drama']
        ],
        'Actors': [
            ['Shah Rukh Khan', 'Kajol'],
            ['Amitabh Bachchan', 'Shah Rukh Khan', 'Kajol'],
            ['Aamir Khan', 'Kareena Kapoor'],
            ['Aamir Khan', 'Sakshi Tanwar'],
            ['Aamir Khan', 'Anushka Sharma'],
            ['Prabhas', 'Anushka Shetty'],
            ['Ranveer Singh', 'Alia Bhatt'],
            ['Kangana Ranaut', 'Rajkummar Rao'],
            ['Ranbir Kapoor', 'Priyanka Chopra'],
            ['Hrithik Roshan', 'Farhan Akhtar']
        ]
    }
    return pd.DataFrame(data)




In [9]:
# Preprocess the dataset
def preprocess_movies(df):
    # Initialize MultiLabelBinarizer for Genres and Actors
    mlb_genres = MultiLabelBinarizer()
    mlb_actors = MultiLabelBinarizer()
    
    # One-hot encode Genres and Actors
    genres_encoded = pd.DataFrame(mlb_genres.fit_transform(df['Genres']),
                                  columns=mlb_genres.classes_,
                                  index=df.index)
    actors_encoded = pd.DataFrame(mlb_actors.fit_transform(df['Actors']),
                                  columns=mlb_actors.classes_,
                                  index=df.index)
    
    # Combine the encoded features
    movie_features = pd.concat([genres_encoded, actors_encoded], axis=1)
    return movie_features, mlb_genres, mlb_actors



In [10]:
# Function to get user preferences
def get_user_preferences(mlb_genres, mlb_actors):
    print("Welcome to the Bollywood Movie Recommender!")
    print("Please enter your preferences below.")
    
    # Input preferred genres
    print("\nAvailable Genres:")
    print(", ".join(mlb_genres.classes_))
    genres_input = input("Enter your preferred genres (comma separated): ")
    preferred_genres = [g.strip().title() for g in genres_input.split(',') if g.strip().title() in mlb_genres.classes_]
    
    # Input favorite actors
    print("\nAvailable Actors:")
    print(", ".join(mlb_actors.classes_))
    actors_input = input("Enter your favorite actors (comma separated): ")
    favorite_actors = [a.strip() for a in actors_input.split(',') if a.strip() in mlb_actors.classes_]
    
    return preferred_genres, favorite_actors



In [11]:
# Function to create user profile vector
def create_user_profile(preferred_genres, favorite_actors, mlb_genres, mlb_actors):
    # Initialize zero vectors
    user_genres = [0] * len(mlb_genres.classes_)
    user_actors = [0] * len(mlb_actors.classes_)
    
    # Set 1 for preferred genres
    for genre in preferred_genres:
        index = list(mlb_genres.classes_).index(genre)
        user_genres[index] = 1
    
    # Set 1 for favorite actors
    for actor in favorite_actors:
        index = list(mlb_actors.classes_).index(actor)
        user_actors[index] = 1
    
    # Combine genres and actors
    user_profile = user_genres + user_actors
    return [user_profile]



In [12]:
# Function to recommend movies
def recommend_movies():
    # Get the sample movies dataset
    movies_df = get_sample_movies()
    
    # Preprocess the movies to get feature vectors
    movie_features, mlb_genres, mlb_actors = preprocess_movies(movies_df)
    
    # Get user preferences
    preferred_genres, favorite_actors = get_user_preferences(mlb_genres, mlb_actors)
    
    if not preferred_genres and not favorite_actors:
        print("\nNo valid preferences entered. Exiting recommender.")
        return
    
    # Create user profile
    user_profile = create_user_profile(preferred_genres, favorite_actors, mlb_genres, mlb_actors)
    
    # Compute cosine similarity between user profile and all movies
    similarity_scores = cosine_similarity(user_profile, movie_features)[0]
    
    # Add similarity scores to the movies dataframe
    movies_df['Similarity_Score'] = similarity_scores
    
    # Sort movies based on similarity score in descending order
    recommended_movies = movies_df.sort_values(by='Similarity_Score', ascending=False)
    
    # Display top 5 recommendations
    top_n = 5
    print(f"\nTop {top_n} Bollywood Movie Recommendations for You:\n")
    for idx, row in recommended_movies.head(top_n).iterrows():
        print(f"Title: {row['Title']}")
        print(f"Genres: {', '.join(row['Genres'])}")
        print(f"Actors: {', '.join(row['Actors'])}")
        print(f"Similarity Score: {row['Similarity_Score']:.4f}\n")

# Run the recommender
if __name__ == "__main__":
    recommend_movies()

Welcome to the Bollywood Movie Recommender!
Please enter your preferences below.

Available Genres:
Action, Adventure, Comedy, Drama, Family, Fantasy, Music, Romance, Sci-Fi, Sports

Available Actors:
Aamir Khan, Alia Bhatt, Amitabh Bachchan, Anushka Sharma, Anushka Shetty, Farhan Akhtar, Hrithik Roshan, Kajol, Kangana Ranaut, Kareena Kapoor, Prabhas, Priyanka Chopra, Rajkummar Rao, Ranbir Kapoor, Ranveer Singh, Sakshi Tanwar, Shah Rukh Khan

Top 5 Bollywood Movie Recommendations for You:

Title: Dangal
Genres: Sports, Drama
Actors: Aamir Khan, Sakshi Tanwar
Similarity Score: 0.3536

Title: 3 Idiots
Genres: Comedy, Drama
Actors: Aamir Khan, Kareena Kapoor
Similarity Score: 0.3536

Title: PK
Genres: Comedy, Sci-Fi
Actors: Aamir Khan, Anushka Sharma
Similarity Score: 0.3536

Title: Bahubali
Genres: Action, Drama, Fantasy
Actors: Prabhas, Anushka Shetty
Similarity Score: 0.3162

Title: Kabhi Khushi Kabhie Gham
Genres: Drama, Family
Actors: Amitabh Bachchan, Shah Rukh Khan, Kajol
Similarit