In [1]:
!pip install bertopic -q

# User specific recommendation system
* <span style="font-size:16px"> A user-specific recommendation system is a system that recommends items (e.g. movies, products, etc.) to a specific user based on their preferences, behavior, and other relevant data.</span>
* <span style="font-size:16px">The system uses algorithms and machine learning techniques to analyze the user's data and generate recommendations that are personalized to their interests and needs.</span>
* <span style="font-size:16px">User-specific recommendation systems can be used in various applications, such as e-commerce, social media, and entertainment.</span>
* <span style="font-size:16px">For example, an e-commerce website can use a recommendation system to suggest products to a user based on their browsing and purchase history. A social media platform can recommend posts and accounts to follow based on the user's interests and behavior. An entertainment platform can recommend movies and TV shows to watch based on the user's viewing history and preferences.</span>

## Import necessary modules

In [2]:
import pandas as pd
import re
from bertopic import BERTopic
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


In [3]:
movies_df = pd.read_csv('movielens-20m-dataset/movie.csv')
ratings_df = pd.read_csv('movielens-20m-dataset/rating.csv',low_memory=False)

In [4]:
## Preprocess movie title
movies_df['title'] = movies_df['title'].apply(lambda x: re.sub(r'[^\w\s]','',x.lower()))

## Preprocess movie genres
#movies_df['genres'] = movies_df['genres'].apply(lambda x: x.lower().split("|"))
movies_df['genres'] = movies_df['genres'].apply(lambda x: ' '.join(x))

In [15]:
ratings_movie_df = pd.merge(ratings_df, movies_df, on='movieId')
user_ratings_df = ratings_movie_df.groupby('userId').agg({'title':lambda x:' '.join(x),
                                                         'genres':lambda  x: ' '.join(x)})
user_ratings_df = user_ratings_df.sample(frac=0.1).reset_index(drop=True)

In [16]:
model = BERTopic()
user_topics,_ = model.fit_transform(user_ratings_df['title']+''+user_ratings_df['genres'])

In [17]:
user_interest = 'action_movies' # replace with your own user interest
user_topic_dist = model.transform([user_interest])[0]
doc_topic_dist = model.transform(user_ratings_df['title'] + ' ' + user_ratings_df['genres'])
doc_sim_matrix = cosine_similarity(doc_topic_dist)

In [25]:
doc_sim_matrix = doc_sim_matrix.reshape(-1, 1)

In [26]:
#doc_sim_matrix = cosine_similarity(model.get_topics())

## function for recommendation
def recommend_movies(user_interest):
    ## Get the topic distribution for the user interest
    user_topic_dist = model.transform([user_interest])[0]
    user_topic_dist = np.array(user_topic_dist).reshape(1, -1)
    
    ## Calculate similarity score
    if user_topic_dist.shape[1] != doc_sim_matrix.shape[1]:
        user_topic_dist = user_topic_dist.reshape(user_topic_dist.shape[0], doc_sim_matrix.shape[1])
    sim_scores = cosine_similarity(user_topic_dist, doc_sim_matrix)[0]
    
    ## Get the indices of top 5 similar users
    top_user_indices = sim_scores.argsort()[:-6:-1]
    
    top_movies = []
    for user_index in top_user_indices:
        user_movies = ratings_movie_df[ratings_movie_df['userId']==user_index]['title'].unique()
        
        top_movies.extend(user_movies[:5])
        
    return top_movies

recommend_movies('action_movies')

['twelve monkeys aka 12 monkeys 1995',
 'usual suspects the 1995',
 'clerks 1994',
 'star wars episode iv  a new hope 1977',
 'shawshank redemption the 1994',
 'star wars episode iv  a new hope 1977',
 'blade runner 1982',
 'terminator 2 judgment day 1991',
 '2001 a space odyssey 1968',
 'star wars episode v  the empire strikes back 1980',
 'jumanji 1995',
 'city of lost children the cité des enfants perdus la 1995',
 'twelve monkeys aka 12 monkeys 1995',
 'seven aka se7en 1995',
 'usual suspects the 1995']