In [4]:
pip install pandas numpy scikit-learn scipy


Note: you may need to restart the kernel to use updated packages.


In [9]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

In [13]:
# Data 
movies = pd.DataFrame({
    'MovieID': [1, 2, 3, 4, 5],
    'Title': ['Toy Story', 'Jumanji', 'Grumpier Old Men', 'Waiting to Exhale', 'Father of the Bride Part II'],
    'Genres': ['Animation|Children|Comedy', 'Adventure|Children|Fantasy', 'Comedy|Romance', 'Comedy|Drama|Romance', 'Comedy']
})

In [17]:


ratings = pd.DataFrame({
    'UserID': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
    'MovieID': [1, 2, 3, 4, 5, 5, 4, 3, 2, 1],
    'Rating': [5, 4, 3, 4, 5, 4, 2, 5, 3, 2]
})

In [19]:
# Collaborative Filtering (User-Based)
def collaborative_filtering(user_id, ratings_df, movies_df, top_n=5):
    # Create user-item matrix
    user_movie_matrix = ratings_df.pivot(index='UserID', columns='MovieID', values='Rating').fillna(0)
    user_movie_matrix_sparse = csr_matrix(user_movie_matrix.values)


In [27]:
# Collaborative Filtering (User-Based)
def collaborative_filtering(user_id, ratings_df, movies_df, top_n=5):
    # Create user-item matrix
    user_movie_matrix = ratings_df.pivot(index='UserID', columns='MovieID', values='Rating').fillna(0)
    user_movie_matrix_sparse = csr_matrix(user_movie_matrix.values)
    
    # Calculate cosine similarity between users
    user_similarity = cosine_similarity(user_movie_matrix_sparse)

    # Get the target user's similarity vector
    user_idx = user_id - 1
    similar_users = user_similarity[user_idx]

    # Get weighted average of ratings from similar users
    weighted_ratings = similar_users.dot(user_movie_matrix.values) / np.array([np.abs(similar_users).sum()])

    # Recommend top N movies the user hasn't rated yet
    user_ratings = user_movie_matrix.loc[user_id]
    unseen_movies = user_ratings[user_ratings == 0]
    recommended_movie_ids = unseen_movies.index[np.argsort(weighted_ratings[user_idx, unseen_movies.index-1])[::-1][:top_n]]

    return movies_df[movies_df['MovieID'].isin(recommended_movie_ids)]


In [45]:
# Add new data (optional functionality)
def add_new_data(movies_df, new_movie):
    new_movie_id = movies_df['MovieID'].max() + 1
    new_movie['MovieID'] = new_movie_id
    return movies_df.append(new_movie, ignore_index=True)


In [47]:
# Collaborative Filtering (User-Based)
def collaborative_filtering(user_id, ratings_df, movies_df, top_n=5):
    # Handle duplicates by averaging the ratings
    ratings_df = ratings_df.groupby(['UserID', 'MovieID']).agg({'Rating': 'mean'}).reset_index()

    # Create user-item matrix
    user_movie_matrix = ratings_df.pivot(index='UserID', columns='MovieID', values='Rating').fillna(0)
    user_movie_matrix_sparse = csr_matrix(user_movie_matrix.values)
    
    # Calculate cosine similarity between users
    user_similarity = cosine_similarity(user_movie_matrix_sparse)

    # Get the target user's similarity vector
    user_idx = user_id - 1
    similar_users = user_similarity[user_idx]

    # Get weighted average of ratings from similar users
    weighted_ratings = similar_users.dot(user_movie_matrix.values) / np.array([np.abs(similar_users).sum()])

    # Recommend top N movies the user hasn't rated yet
    user_ratings = user_movie_matrix.loc[user_id]
    unseen_movies = user_ratings[user_ratings == 0]
    
    # Fix: Use 1D weighted_ratings[user_idx] to get movie recommendations
    recommended_movie_ids = unseen_movies.index[np.argsort(weighted_ratings[unseen_movies.index - 1])[::-1][:top_n]]

    return movies_df[movies_df['MovieID'].isin(recommended_movie_ids)]


In [60]:
# Content-Based Filtering
def content_based_filtering(movie_id, movies_df, top_n=5):
    # Create movie-genre matrix
    genres_matrix = movies_df['Genres'].str.get_dummies('|')
    
    # Compute cosine similarity between movies
    movie_similarity = cosine_similarity(genres_matrix)

    # Get the target movie's similarity vector
    movie_idx = movies_df[movies_df['MovieID'] == movie_id].index[0]
    similar_movies = movie_similarity[movie_idx]

    # Recommend top N similar movies
    similar_movie_ids = np.argsort(similar_movies)[::-1][1:top_n+1]
    return movies_df.iloc[similar_movie_ids]


In [62]:
# Example usage for content-based filtering
print("\nContent-Based Recommendations for Movie 'Toy Story':")
print(content_based_filtering(1, movies))



Content-Based Recommendations for Movie 'Toy Story':
   MovieID                        Title                      Genres
4        5  Father of the Bride Part II                      Comedy
2        3             Grumpier Old Men              Comedy|Romance
3        4            Waiting to Exhale        Comedy|Drama|Romance
1        2                      Jumanji  Adventure|Children|Fantasy
