In [None]:
import pandas as pd

# Load the movies and ratings data
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

# Display the first few rows of each dataframe
movies.head(), ratings.head()

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Initialize the TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Replace NaN with an empty string
movies['genres'] = movies['genres'].fillna('')

# Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf_vectorizer.fit_transform(movies['genres'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Output the shape of cosine_sim to check if it's computed correctly
cosine_sim.shape

In [None]:
from sklearn.decomposition import TruncatedSVD
import numpy as np

# Create a user-item matrix where index values are userId and columns are movieId
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating')

# Fill NaN values with 0
user_item_matrix = user_item_matrix.fillna(0)

# Initialize SVD
svd = TruncatedSVD(n_components=20, random_state=42)

# Fit and transform the user-item matrix
user_item_svd_matrix = svd.fit_transform(user_item_matrix)

# Compute the predicted ratings
predicted_ratings = np.dot(user_item_svd_matrix, svd.components_)

# Convert the predicted ratings into a DataFrame
predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=user_item_matrix.columns, index=user_item_matrix.index)

# Show a snippet of the predicted ratings
predicted_ratings_df.head()

In [None]:
# Function to get movie recommendations based on content-based and collaborative filtering
def hybrid_recommendations(movie_title, userId, top_n=10):
    # Get the index of the movie that matches the title
    idx = movies.index[movies['title'] == movie_title][0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the top-n most similar movies
    sim_scores = sim_scores[1:top_n+1]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Get the predicted ratings for these movies for the given user
    predicted_ratings = predicted_ratings_df.loc[userId, movies['movieId'].iloc[movie_indices]].sort_values(ascending=False)

    # Get final recommended movies
    recommended_movies = movies.iloc[movie_indices].copy()
    recommended_movies['predicted_rating'] = predicted_ratings
    recommended_movies = recommended_movies.sort_values('predicted_rating', ascending=False)

    return recommended_movies

# Test the function
hybrid_recommendations('Toy Story (1995)', 1)

In [None]:
# Modified function to handle edge cases
def refined_hybrid_recommendations(movie_title, userId, top_n=10):
    # Get the index of the movie that matches the title
    idx = movies.index[movies['title'] == movie_title][0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the top-n most similar movies
    sim_scores = sim_scores[1:top_n+1]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Get the predicted ratings for these movies for the given user
    predicted_ratings = predicted_ratings_df.loc[userId, movies['movieId'].iloc[movie_indices]]

    # Remove NaN values
    predicted_ratings = predicted_ratings.dropna()

    # If no predicted ratings, return a message
    if predicted_ratings.empty:
        return 'No recommendations available'

    # Sort the predicted ratings
    predicted_ratings = predicted_ratings.sort_values(ascending=False)

    # Get final recommended movies
    recommended_movies = movies.iloc[movie_indices].copy()
    recommended_movies = recommended_movies[recommended_movies['movieId'].isin(predicted_ratings.index)]
    recommended_movies['predicted_rating'] = predicted_ratings
    recommended_movies = recommended_movies.sort_values('predicted_rating', ascending=False)

    return recommended_movies

# Test the refined function
refined_hybrid_recommendations('Toy Story (1995)', 1)

In [None]:
from sklearn.neighbors import NearestNeighbors

# Function for Collaborative Filtering based recommendations
def collaborative_filtering_recommendations(userId, top_n=10):
    # Fit NearestNeighbors model
    model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
    model_knn.fit(user_item_matrix)

    # Get the nearest neighbors for the given user
    distances, indices = model_knn.kneighbors(user_item_matrix.loc[userId].values.reshape(1, -1), n_neighbors=top_n+1)

    # Get the user indices
    user_indices = [user_item_matrix.index[i] for i in indices.flatten()][1:]

    # Get the movie ratings from these similar users
    similar_users_ratings = user_item_matrix.loc[user_indices].mean().sort_values(ascending=False)

    # Get the top-n recommended movies
    recommended_movie_ids = similar_users_ratings.index[:top_n]
    recommended_movies = movies[movies['movieId'].isin(recommended_movie_ids)]

    return recommended_movies

# Test the function
collaborative_filtering_recommendations(1)

In [None]:
# Function for Content-Based Filtering recommendations
def content_based_recommendations(movie_title, top_n=10):
    # Get the index of the movie that matches the title
    idx = movies.index[movies['title'] == movie_title][0]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the top-n most similar movies
    sim_scores = sim_scores[1:top_n+1]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Get the top-n most similar movies
    recommended_movies = movies.iloc[movie_indices]

    return recommended_movies

# Test the function
content_based_recommendations('Toy Story (1995)')