In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity

# --- STEP 1: LOAD AND PREPARE DATA ---
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=r_cols, encoding='latin-1')

i_cols = [
    'movie id', 'movie title', 'release date', 'video release date', 'IMDb URL', 
    'unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 
    'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 
    'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
]
items = pd.read_csv('ml-100k/u.item', sep='|', names=i_cols, encoding='latin-1')
genre_cols = i_cols[5:] # Genres start from index 5

# 1.1 Collaborative Setup (User-User)
data_matrix = ratings.pivot_table(index='user_id', columns='movie_id', values='rating').fillna(0)
user_similarity = pairwise_distances(data_matrix, metric='cosine')
mean_user_rating = data_matrix.values.mean(axis=1)
ratings_diff = (data_matrix.values - mean_user_rating[:, np.newaxis])
user_prediction = mean_user_rating[:, np.newaxis] + user_similarity.dot(ratings_diff) / np.array([np.abs(user_similarity).sum(axis=1)]).T
cf_pred_df = pd.DataFrame(user_prediction, index=data_matrix.index, columns=data_matrix.columns)

# --- STEP 2: THE HYBRID FUNCTION ---

def get_hybrid_recommendations(user_id, top_n=10, cf_weight=0.3):
    """
    Combines Collaborative Filtering and Content-Based Filtering.
    cf_weight: 0.5 means 50% Collaborative and 50% Content.
    """
    # --- Part A: Content-Based Score ---
    user_ratings = ratings[ratings['user_id'] == user_id]
    if user_ratings.empty: return "User not found."
    
    # Build User Profile from genres
    user_rated_genres = user_ratings.merge(items[['movie id'] + genre_cols], left_on='movie_id', right_on='movie id')
    for genre in genre_cols:
        user_rated_genres[genre] = user_rated_genres[genre] * user_rated_genres['rating']
    
    user_profile = user_rated_genres[genre_cols].sum()
    if user_profile.sum() > 0: user_profile = user_profile / user_profile.sum()
    
    # Calculate Content Similarity for all movies
    content_sim = cosine_similarity(user_profile.values.reshape(1, -1), items[genre_cols].values).flatten()
    
    # --- Part B: Collaborative Score ---
    # Get the predicted ratings row for this user
    cf_scores = cf_pred_df.loc[user_id].values
    
    # --- Part C: Normalization and Combination ---
    # Normalize CF scores to 0-1 scale to match Content scores
    cf_min, cf_max = cf_scores.min(), cf_scores.max()
    cf_normalized = (cf_scores - cf_min) / (cf_max - cf_min) if (cf_max - cf_min) != 0 else cf_scores
    
    # Combine scores
    hybrid_scores = (cf_weight * cf_normalized) + ((1 - cf_weight) * content_sim)
    
    # Create results dataframe
    res_df = items[['movie id', 'movie title']].copy()
    res_df['score'] = hybrid_scores
    
    # Filter out already watched movies
    watched_ids = user_ratings['movie_id'].tolist()
    res_df = res_df[~res_df['movie id'].isin(watched_ids)]
    
    return res_df.sort_values(by='score', ascending=False).head(top_n)



In [13]:
# --- STEP 3: USAGE --- weight 0.3 is given in the function itself
user_id = 54
results = get_hybrid_recommendations(user_id)

print(f"Hybrid Recommendations 10 movies for User {user_id}:")
print(results[['movie title', 'score']])

Hybrid Recommendations 10 movies for User 54:
                           movie title     score
27                    Apollo 13 (1995)  0.733206
299               Air Force One (1997)  0.696010
171    Empire Strikes Back, The (1980)  0.692083
97    Silence of the Lambs, The (1991)  0.686142
78                Fugitive, The (1993)  0.682333
194             Terminator, The (1984)  0.658406
95   Terminator 2: Judgment Day (1991)  0.657063
53                     Outbreak (1995)  0.638400
143                    Die Hard (1988)  0.629053
264   Hunt for Red October, The (1990)  0.622961


In [15]:
#A Hybrid Recommendation System is the gold standard because it combines the best of both worlds: 
#it uses the wisdom of the crowd (Collaborative) while ensuring the movies match your personal taste in genres (Content-Based).