In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Sample user ratings data
data = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 3, 4],
    'movie_title': [
        'Inception', 'The Matrix', 'Interstellar', 'Inception',
        'The Dark Knight', 'Interstellar', 'The Matrix', 'Fight Club',
        'The Dark Knight'
    ],
    'rating': [5, 4, 5, 4, 5, 5, 4, 5, 4]
}

df = pd.DataFrame(data)

# Sample movie descriptions for content-based filtering
movies = {
    'movie_title': [
        'Inception', 'The Matrix', 'Interstellar', 'The Dark Knight',
        'Fight Club'
    ],
    'description': [
        'A mind-bending thriller where dreams within dreams are the key to a complex heist.',
        'A hacker discovers the true nature of reality and his role in the war against its controllers.',
        'A journey through space and time to find a new home for humanity.',
        'A superhero battles crime and his own inner demons in a gritty cityscape.',
        'An office worker creates an underground fight club as a form of rebellion.'
    ]
}

movie_df = pd.DataFrame(movies)

# Content-Based Filtering
# Create TF-IDF matrix for the movie descriptions
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movie_df['description'])

# Calculate cosine similarity between movies
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get movie recommendations based on content
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = movie_df.index[movie_df['movie_title'] == title].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:4]  # Get top 3 recommendations

    movie_indices = [i[0] for i in sim_scores]
    return movie_df['movie_title'].iloc[movie_indices]

# Example usage of Content-Based Filtering
recommendations = get_recommendations('Inception')
print("Content-Based Recommendations for 'Inception':", recommendations.tolist())

# Collaborative Filtering
# Create a user-item matrix
user_movie_matrix = df.pivot_table(index='user_id', columns='movie_title', values='rating')

# Fill NaN with 0s (for simplicity)
user_movie_matrix = user_movie_matrix.fillna(0)

# Calculate similarity between users
user_similarity = 1 - pairwise_distances(user_movie_matrix, metric='cosine')

# Predict ratings
def predict_ratings(user_similarity, user_movie_matrix):
    return user_similarity.dot(user_movie_matrix) / np.array([np.abs(user_similarity).sum(axis=1)]).T

user_pred = predict_ratings(user_similarity, user_movie_matrix)

# Convert predictions to DataFrame for easy lookup
user_pred_df = pd.DataFrame(user_pred, columns=user_movie_matrix.columns, index=user_movie_matrix.index)

# Function to recommend movies for a specific user
def recommend_movies(user_id, user_pred_df, num_recommendations=3):
    user_ratings = user_pred_df.loc[user_id].sort_values(ascending=False)
    watched_movies = df[df['user_id'] == user_id]['movie_title'].tolist()
    recommendations = user_ratings.index[~user_ratings.index.isin(watched_movies)].tolist()
    return recommendations[:num_recommendations]

# Example usage of Collaborative Filtering
collaborative_recommendations = recommend_movies(1, user_pred_df)
print("Collaborative Filtering Recommendations for User 1:", collaborative_recommendations)


Content-Based Recommendations for 'Inception': ['The Matrix', 'Interstellar', 'The Dark Knight']
Collaborative Filtering Recommendations for User 1: ['Fight Club', 'The Dark Knight']
