In [3]:
import pandas as pd

# Sample movies DataFrame with features
movies_df = pd.DataFrame({
    'movieId': [1, 2, 3, 4, 5],
    'title': ['Movie A', 'Movie B', 'Movie C', 'Movie D', 'Movie E'],
    'genre': ['Action|Adventure', 'Action|Sci-Fi', 'Drama', 'Comedy|Romance', 'Action|Drama'],
})

# Sample ratings DataFrame
ratings_df = pd.DataFrame({
    'userId': [1, 1, 1, 2, 2],
    'movieId': [1, 2, 3, 1, 4],
    'rating': [5, 4, 3, 2, 5]
})


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer


# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(movies_df['genre'])
tfidf_matrix


<5x7 sparse matrix of type '<class 'numpy.float64'>'
	with 10 stored elements in Compressed Sparse Row format>

In [11]:
def create_user_profile(user_id, ratings_df, movies_df):
    # Get the movies rated by the user
    user_ratings = ratings_df[ratings_df['userId'] == user_id]
    
    # Get the features of those movies
    rated_movies = movies_df[movies_df['movieId'].isin(user_ratings['movieId'])]
    
    # Calculate the user profile as the average of the features of rated movies
    if rated_movies.empty:
        return None  # No rated movies
    
    user_profile = tfidf_matrix[rated_movies.index].mean(axis=0)
    return user_profile

# Example: Create a user profile for user with ID 1
user_profile = create_user_profile(user_id=1, ratings_df=ratings_df, movies_df=movies_df)
user_profile

matrix([[0.22605504, 0.1794993 , 0.        , 0.17725742, 0.219706  ,
         0.15804155, 0.        , 0.15804155, 0.1794993 , 0.17725742,
         0.15804155, 0.        , 0.15804155, 0.1794993 , 0.        ,
         0.        ]])

In [10]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Sample movies DataFrame with features
movies_df = pd.DataFrame({
    'movieId': [1, 2, 3, 4, 5],
    'title': ['Movie A', 'Movie B', 'Movie C', 'Movie D', 'Movie E'],
    'genre': ['Action|Adventure', 'Action|Sci-Fi', 'Drama', 'Comedy|Romance', 'Action|Drama'],
    'keywords': ['space, hero', 'future, robots', 'love, family', 'funny, love', 'war, struggle']
})

# Sample ratings DataFrame
ratings_df = pd.DataFrame({
    'userId': [1, 1, 1, 2, 2],
    'movieId': [1, 2, 3, 1, 4],
    'rating': [5, 4, 3, 2, 5]
})

# Combine features into a single string for each movie
movies_df['combined_features'] = movies_df['genre'] + ' ' + movies_df['keywords']

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(movies_df['combined_features'])

def create_user_profile(user_id, ratings_df, movies_df):
    user_ratings = ratings_df[ratings_df['userId'] == user_id]
    rated_movies = movies_df[movies_df['movieId'].isin(user_ratings['movieId'])]
    
    if rated_movies.empty:
        return None
    
    user_profile = tfidf_matrix[rated_movies.index].mean(axis=0)
    return user_profile

def get_movie_recommendations(user_profile, tfidf_matrix, movies_df, top_n=5):
    # Convert user_profile to a 2D array if it's a sparse matrix or a matrix
    if isinstance(user_profile, np.matrix):
        user_profile = np.asarray(user_profile)
    elif hasattr(user_profile, 'toarray'):
        user_profile = user_profile.toarray()
    
    # Calculate cosine similarity between user profile and all movies
    similarities = cosine_similarity(user_profile, tfidf_matrix).flatten()
    
    # Create a DataFrame to hold movie titles and their corresponding similarity scores
    movie_similarities = pd.DataFrame({
        'movieId': movies_df['movieId'],
        'title': movies_df['title'],
        'similarity': similarities
    })
    
    # Sort the DataFrame by similarity score in descending order
    recommended_movies = movie_similarities.sort_values(by='similarity', ascending=False)
    
    # Return the top N recommended movies
    return recommended_movies.head(top_n)

# Example: Get recommendations for user with ID 1
user_profile = create_user_profile(user_id=1, ratings_df=ratings_df, movies_df=movies_df)
if user_profile is not None:
    recommendations = get_movie_recommendations(user_profile, tfidf_matrix, movies_df, top_n=3)
    print(recommendations)
else:
    print("No rated movies for the user to base recommendations on.")


   movieId    title  similarity
1        2  Movie B    0.620225
0        1  Movie A    0.620225
2        3  Movie C    0.556499
