## Content Based
We can recommend movies based on their similarity in features, such as genres and descriptions. This is a good option if we have a single user and want to recommend similar movies to their past favorites.

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
movies = pd.read_csv("/home/meliyio/ai_projects/movie_recommender/data/movies.csv")
ratings = pd.read_csv("/home/meliyio/ai_projects/movie_recommender/data/ratings.csv")

In [3]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510


In [8]:
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(movies["genres"])

In [None]:
# compute a similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [None]:
# function to get movie recommendations
def get_recommendation(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = movies[movies["title"] == title].index[0]
    
    #Get pairwise similarity score
    sim_score = list(enumarate(cosine_sim[idx]))
    # sort by similarity
    sim_score = sorted(sim_score, key=lambda x:x[1], reverse=True)
    #GEt top ten most similar movies
    sim_score = sim_score[1:11]
    movie_indeces = [i[0] for i in sim_score]
    
    # Return the top 10 most similar movies
    return movies["title"].iloc[movie_indeces]

## Collaborative filtering
Collaborative filtering leverages user ratings and works well when we have data on user preferences.

In [None]:
# pip install scikit-suprise
from suprise import SVD, Dataset, Reader
from suprise.model_selection import train_test_split
from suprise import accuracy

In [None]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings[["userId","movieId","rating"]],reader)

# split into training and testing
trainset, testset = train_test_split(data, test_size=0.25)

# use svd algorithm
model = SVD()
model.fit(trainset)
predictions = model.test(testset)

In [None]:
# Getting recommendation
from collections import defaultdict

def get_top_n(preictions, n=10):
    # Map the prediction to each user
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
        
    # Sort the predictions for each user and retrieve the top-n items
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

# Generate prediction for all users
top_n = get_top_n(preditions, n=10)

## Hybrid recommender
A hybrid recommender combines both collaborative and content-based recommendations. A simple hybrid approach is to take a weighted average of the recommendation scores from both models.

In [None]:
def hybrid_recommendation(user_id, movie_title, content_weight=0.5, collaborative_weight=0.5):
    # Get content-based recommendations
    content_recs = get_recommendation(movie_title)
    content_scores = {movie: 10 - idx for idx, movie in enumarate(content_recs)}
    
    # Get collaborative filtering recommendations for the user
    collab_recs = dict(top_n[user_id])
    
    # combine scores
    hybrid_scores = {}
    for movie, score in content_scores.items():
        hybrid[movie] = score * content_weight
    for movie, score in collab_recs.items():
        hybrid[movie] = hybrid_scores.get(movie, 0) + score * collaborative_weight
        
    # sort and get top 10 recommendations 
    hybrid_recommendation = sorted(hybrid_scores.items(), key=lambda x:x[1], reverse=True)
    return [movie for movie, score in hybrid_recommendation[:10]]