In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Sample dataset: Movie titles with multiple features
movies = {
    'MovieID': [1, 2, 3, 4, 5],
    'Title': ['The Matrix', 'Titanic', 'Toy Story', 'The Lion King', 'Jurassic Park'],
    'Genres': ['Action Sci-Fi', 'Romance Drama', 'Animation Family', 'Animation Family', 'Adventure Sci-Fi'],
    'Director': ['Wachowski', 'James Cameron', 'John Lasseter', 'Roger Allers', 'Steven Spielberg'],
    'Plot_Keywords': ['virtual reality, hacker, dystopia', 'shipwreck, love, iceberg',
                      'toys, adventure, friendship', 'lion, jungle, adventure', 'dinosaurs, island, adventure']
}
movies_df = pd.DataFrame(movies)

# Sample user ratings: User has rated all movies
user_ratings = {
    'UserID': [1, 1, 1, 1, 1],
    'MovieID': [1, 2, 3, 4, 5],
    'Rating': [5, 3, 4, 5, 2]
}
ratings_df = pd.DataFrame(user_ratings)

print("Movies DataFrame:")
print(movies_df)
print("\nUser Ratings DataFrame:")
print(ratings_df)

Movies DataFrame:
   MovieID          Title            Genres          Director  \
0        1     The Matrix     Action Sci-Fi         Wachowski   
1        2        Titanic     Romance Drama     James Cameron   
2        3      Toy Story  Animation Family     John Lasseter   
3        4  The Lion King  Animation Family      Roger Allers   
4        5  Jurassic Park  Adventure Sci-Fi  Steven Spielberg   

                       Plot_Keywords  
0  virtual reality, hacker, dystopia  
1           shipwreck, love, iceberg  
2        toys, adventure, friendship  
3            lion, jungle, adventure  
4       dinosaurs, island, adventure  

User Ratings DataFrame:
   UserID  MovieID  Rating
0       1        1       5
1       1        2       3
2       1        3       4
3       1        4       5
4       1        5       2


In [None]:
# Combine relevant features into a single text field
movies_df['Combined_Features'] = movies_df['Genres'] + ' ' + movies_df['Director'] + ' ' + movies_df['Plot_Keywords']

print("\nCombined Features for Each Movie:")
print(movies_df[['Title', 'Combined_Features']])



Combined Features for Each Movie:
           Title                                  Combined_Features
0     The Matrix  Action Sci-Fi Wachowski virtual reality, hacke...
1        Titanic  Romance Drama James Cameron shipwreck, love, i...
2      Toy Story  Animation Family John Lasseter toys, adventure...
3  The Lion King  Animation Family Roger Allers lion, jungle, ad...
4  Jurassic Park  Adventure Sci-Fi Steven Spielberg dinosaurs, i...


In [None]:
# Initialize the TF-IDF vectorizer
tfidf = TfidfVectorizer(stop_words='english')

# Fit and transform the combined features into a TF-IDF matrix
tfidf_matrix = tfidf.fit_transform(movies_df['Combined_Features'])

print("\nTF-IDF Matrix Shape (Movies x Features):", tfidf_matrix.shape)



TF-IDF Matrix Shape (Movies x Features): (5, 30)


In [None]:
# Compute the cosine similarity matrix
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Convert to a DataFrame for easier visualization
cosine_sim_df = pd.DataFrame(cosine_sim_matrix, index=movies_df['Title'], columns=movies_df['Title'])

print("\nCosine Similarity Matrix:")
print(cosine_sim_df)



Cosine Similarity Matrix:
Title          The Matrix  Titanic  Toy Story  The Lion King  Jurassic Park
Title                                                                      
The Matrix       1.000000      0.0   0.000000       0.000000       0.180857
Titanic          0.000000      1.0   0.000000       0.000000       0.000000
Toy Story        0.000000      0.0   1.000000       0.304390       0.140429
The Lion King    0.000000      0.0   0.304390       1.000000       0.140429
Jurassic Park    0.180857      0.0   0.140429       0.140429       1.000000


In [None]:
def recommend_movies(user_id, ratings_df, movies_df, cosine_sim_matrix, top_n=3):
    # Get the user's movie ratings
    user_data = ratings_df[ratings_df['UserID'] == user_id]

    # Merge with movies to get the titles
    user_data = user_data.merge(movies_df, on='MovieID')

    # Calculate the weighted sum of cosine similarities for each movie
    # The weight is the user's rating for the movie
    user_movie_ids = user_data['MovieID'].tolist()
    user_ratings = user_data['Rating'].tolist()
    movie_indices = [movies_df.index[movies_df['MovieID'] == movie_id].tolist()[0] for movie_id in user_movie_ids]

    # Calculate scores by multiplying the user's ratings by the cosine similarity scores
    scores = cosine_sim_matrix[movie_indices, :].T.dot(user_ratings)

    # Create a DataFrame of scores
    score_df = pd.DataFrame(scores, index=movies_df['Title'], columns=['Score'])

    # Remove movies the user has already rated
    score_df = score_df[~score_df.index.isin(user_data['Title'])]

    # Since the user has already rated all movies, the result will be empty
    recommended_movies = score_df.sort_values(by='Score', ascending=False).head(top_n)

    return recommended_movies

# Get top 3 recommended movies for User 1
recommendations = recommend_movies(user_id=1, ratings_df=ratings_df, movies_df=movies_df, cosine_sim_matrix=cosine_sim_matrix, top_n=3)

print("\nTop 3 Recommended Movies for User 1:")
print(recommendations)



Top 3 Recommended Movies for User 1:
Empty DataFrame
Columns: [Score]
Index: []
