# Part 3: Collaborative Filtering and Hybrid Approach



In [1]:
from scipy.sparse.linalg import svds
from sklearn.metrics.pairwise import cosine_similarity, linear_kernel
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from utils import *

ensure_results_folders()

Results folder exists at: c:\Users\moham\Desktop\IRS GIT\SECTION2_DomainRecommender\results
Subfolder exists: c:\Users\moham\Desktop\IRS GIT\SECTION2_DomainRecommender\results\tables
Results folder exists at: c:\Users\moham\Desktop\IRS GIT\SECTION2_DomainRecommender\results
Subfolder exists: c:\Users\moham\Desktop\IRS GIT\SECTION2_DomainRecommender\results\tables


'c:\\Users\\moham\\Desktop\\IRS GIT\\SECTION2_DomainRecommender\\results'

# Data Loading and Preprocessing

In [2]:
# Load the dataset
filename = 'Amazon_health&household_label_encoded.csv'
print(f"Loading {filename}...")
df = load_data(filename)

if df is not None:
    print("Dataset/Sample loaded successfully.")
    print(df.head())
    print(f"Shape: {df.shape}")
else:
    print("Failed to load dataset.")

Loading Amazon_health&household_label_encoded.csv...
 Found requested table at: ..\data\Amazon_health&household_label_encoded.csv
Dataset/Sample loaded successfully.
                        user_id  \
0  AEV4DP5E3FJH6FHDLXUYQTDEQYCQ   
1  AEHWUHNTB5FX32HJ7UBOZ2WWUX3Q   
2  AEHWUHNTB5FX32HJ7UBOZ2WWUX3Q   
3  AEHWUHNTB5FX32HJ7UBOZ2WWUX3Q   
4  AEHWUHNTB5FX32HJ7UBOZ2WWUX3Q   

                                                item  item_id_encoded  rating  \
0  Sonic Handheld Percussion Massage Gun - Deep T...              827       2   
1  DUDE Wipes On-The-Go Flushable Wet Wipes - 1 P...              255       5   
2  Sleep Mask for Side Sleeper, 100% Blackout 3D ...              814       5   
3  Cottonelle Freshfeel Flushable Wet Wipes, Adul...              226       5   
4  Silk Sleep Eye Mask for Men Women, Comfortable...              808       5   

   price                                               text  is_green  
0  79.99  This product worked great when it worked, but ...     

In [3]:
# Filter/Rename columns if necessary to match standard user-item format (userId, itemId, rating)
# Assuming the encoded csv has columns that map to user, item, rating. 
# Let's check columns and map them. 
# Usually: 'user_id', 'item_id', 'rating' or similar.
if df is not None:
    print("Columns:", df.columns)
    # Heuristic column mapping if needed, or assume standard names from inspection if possible
    # Adjusted based on typical label encoded files: usually 'user_encoded', 'item_encoded', 'rating'
    # We will assume first 3 columns are user, item, rating if strict names aren't found, 
    # or check specifically for 'user', 'item', 'rating' variations.
    
    # Pivot to create User-Item Matrix
    # Using mean filling as per Section 1
    print("Creating User-Item Matrix...")
    # Identify user and item columns
    user_col = [c for c in df.columns if 'user' in c.lower()][0]
    item_col = [c for c in df.columns if 'item' in c.lower() or 'movie' in c.lower() or 'product' in c.lower()][0]
    rating_col = [c for c in df.columns if 'rating' in c.lower()][0]
    
    print(f"Using columns: User='{user_col}', Item='{item_col}', Rating='{rating_col}'")
    
    # Handle duplicates if any (User-Item pairs should be unique)
    if df.duplicated(subset=[user_col, item_col]).any():
        print(f"Duplicate entries found: {df.duplicated(subset=[user_col, item_col]).sum()}")
        print("Aggregating duplicates by taking the mean rating...")
        df = df.groupby([user_col, item_col], as_index=False)[rating_col].mean()
    
    # Pivot to create User-Item Matrix
    R_df = df.pivot(index=user_col, columns=item_col, values=rating_col)
    
    # Normalization: Subtract User Means
    print("Normalizing by subtracting User Means...")
    user_means = R_df.mean(axis=1)
    R_centered = R_df.sub(user_means, axis=0)
    
    # Fill NaN with 0 (neutral relative to user's average)
    R_filled = R_centered.fillna(0)
    R = R_filled.values
    
    print(f"Matrix R Shape: {R.shape}")

Columns: Index(['user_id', 'item', 'item_id_encoded', 'rating', 'price', 'text',
       'is_green'],
      dtype='object')
Creating User-Item Matrix...
Using columns: User='user_id', Item='item', Rating='rating'
Duplicate entries found: 208
Aggregating duplicates by taking the mean rating...
Normalizing by subtracting User Means...
Matrix R Shape: (10000, 1000)


## 8. Collaborative Filtering Integration

### 8.1: Item-Based Collaborative Filtering (Cosine Similarity)

In [4]:
def perform_item_based_cf(matrix_centered, top_n=5):
    print("\nComputing Item-Item Cosine Similarity...")
    # Calculate Cosine Similarity between Items
    # matrix_centered is (Users x Items), so we transpose to get (Items x Users)
    # Samples=Items, Features=Users
    item_similarity = cosine_similarity(matrix_centered.T)
    
    print(f"Item Similarity Matrix Shape: {item_similarity.shape}")
    
    # Predict: Weighted Sum of ratings
    print("Generating predictions (Item-Based)...")
    pred_weighted_sum = np.dot(matrix_centered, item_similarity)
    
    # Normalize by sum of absolute similarities
    sum_abs_sim = np.array([np.abs(item_similarity).sum(axis=1)])
    
    # Avoid division by zero
    sum_abs_sim[sum_abs_sim == 0] = 1e-9
    
    R_pred_centered = pred_weighted_sum / sum_abs_sim
    
    return R_pred_centered, item_similarity

def recommend_items(user_idx, R_pred, original_df_pivot, top_n=5):
    # Helper reused for both Item-Based and SVD
    # User ID
    user_id_label = original_df_pivot.index[user_idx]
    
    # Get user's predicted ratings (Add mean back!)
    # R_pred passed here is the CENTERED prediction row
    user_mean = original_df_pivot.iloc[user_idx].mean(skipna=True)
    user_predicted_ratings = R_pred + user_mean
    
    # Get observed items to filter them out
    observed_items = original_df_pivot.iloc[user_idx].dropna().index
    
    # Create a series for easier sorting/filtering
    predictions = pd.Series(user_predicted_ratings, index=original_df_pivot.columns)
    
    # Filter out already rated items
    predictions = predictions.drop(labels=observed_items, errors='ignore')
    
    # Sort descending
    top_recommendations = predictions.sort_values(ascending=False).head(top_n)
    
    return top_recommendations

if 'R' in locals():
    # Perform Item-Based CF
    R_pred_item_centered, item_sim_matrix = perform_item_based_cf(R)
    
    # Select a few target users (e.g., first 5 in the matrix)
    target_users = range(5)
    
    print(f"\nTop 5 Personalized Recommendations (Item-Based CF) for first 5 users:")
    for u_idx in target_users:
        user_real_id = R_df.index[u_idx]
        recs = recommend_items(u_idx, R_pred_item_centered[u_idx, :], R_df)
        
        print(f"\nUser {user_real_id}:")
        for i, (item_name, rating) in enumerate(recs.items()):
            print(f"  {i+1}. Item {item_name} (Pred: {rating:.2f})")


Computing Item-Item Cosine Similarity...
Item Similarity Matrix Shape: (1000, 1000)
Generating predictions (Item-Based)...

Top 5 Personalized Recommendations (Item-Based CF) for first 5 users:

User AE22FRNBP2JT5DYDEZSPVR3LKQDA:
  1. Item simplehuman Trash Can Liner A, 4.5 Liters/1.2 Gallons, 30-Count (Pred: 5.00)
  2. Item (20 Pack) Extra Large Eraser Sponge - Extra Thick, Long Lasting, Premium Melamine Sponges in Bulk - Multi Surface Power Scrubber Foam Cleaning Pads - Bathtub, Floor, Baseboard, Bathroom, Wall Cleaner (Pred: 5.00)
  3. Item 1 BY ONE Scale for Body Weight, Smart Body Fat Scale, Digital Bathroom Weighing Scale with Water Percentage Muscle Mass Bluetooth BMI, 14 Body Composition Analyzer, 400 lb (Pred: 5.00)
  4. Item 1 Pair Happystep Comfort Memory Foam Insoles, Orthotic PU Shoe Inserts, Arch Support, Heel Cushioning, Shock Absorption, Plantar Fasciitis Foot Pain Relief for Men and Women (US W: 5-6) (Pred: 5.00)
  5. Item 1 X 10 Energizer 377 376 Watch Battery SR626S

### 8.2: Matrix Factorization using SVD

In [5]:
def perform_svd(matrix, k):
    print(f"\nCalculating SVD with k={k}...")
    U, sigma, Vt = svds(matrix, k=k)
    
    # Reverse to have descending order
    U = U[:, ::-1]
    sigma = sigma[::-1]
    Vt = Vt[::-1, :]
    
    sigma_diag = np.diag(sigma)
    
    # Reconstruct (This is the approximated CENTERED matrix)
    R_k_centered = np.dot(np.dot(U, sigma_diag), Vt)
    
    return U, sigma, Vt, R_k_centered

def calculate_metrics(original_centered, approximation_centered):
    # We want to measure how well we approximate the original ratings (observed ones)
    # original_centered has 0s where data was missing, we shouldn't really penalize matching the 0s
    # but standard RMSE on the whole matrix is what was requested implicitly.
    diff = original_centered - approximation_centered
    mae = np.mean(np.abs(diff))
    rmse = np.sqrt(np.mean(np.square(diff)))
    return mae, rmse

k_values = [10, 20]
svd_results = {}

if 'R' in locals():
    print("{:<5} {:<10} {:<10}".format("k", "MAE", "RMSE"))
    print("-"*30)
    
    for k in k_values:
        U, sigma, Vt, R_k_centered = perform_svd(R, k)
        mae, rmse = calculate_metrics(R, R_k_centered)
        
        svd_results[k] = {
            'R_k_centered': R_k_centered, 
            'MAE': mae, 
            'RMSE': rmse
        }
        print("{:<5} {:<10.4f} {:<10.4f}".format(k, mae, rmse))

k     MAE        RMSE      
------------------------------

Calculating SVD with k=10...
10    0.0004     0.0205    

Calculating SVD with k=20...
20    0.0005     0.0198    


In [6]:
if 'R' in locals():
    # Select predictions for k=20
    best_k = 20
    if best_k in svd_results:
        R_final_centered = svd_results[best_k]['R_k_centered']
        
        # Select a few target users (e.g., first 5 in the matrix)
        target_users = range(5)
        
        print(f"\nTop 5 Personalized Recommendations (SVD k={best_k}) for first 5 users:")
        for u_idx in target_users:
            user_real_id = R_df.index[u_idx]
            recs = recommend_items(u_idx, R_final_centered[u_idx, :], R_df)
            
            print(f"\nUser {user_real_id}:")
            for i, (item_name, rating) in enumerate(recs.items()):
                print(f"  {i+1}. Item {item_name} (Pred: {rating:.2f})")
    else:
         print(f"SVD results for k={best_k} not found.")


Top 5 Personalized Recommendations (SVD k=20) for first 5 users:

User AE22FRNBP2JT5DYDEZSPVR3LKQDA:
  1. Item simplehuman Trash Can Liner A, 4.5 Liters/1.2 Gallons, 30-Count (Pred: 5.00)
  2. Item (20 Pack) Extra Large Eraser Sponge - Extra Thick, Long Lasting, Premium Melamine Sponges in Bulk - Multi Surface Power Scrubber Foam Cleaning Pads - Bathtub, Floor, Baseboard, Bathroom, Wall Cleaner (Pred: 5.00)
  3. Item 1 BY ONE Scale for Body Weight, Smart Body Fat Scale, Digital Bathroom Weighing Scale with Water Percentage Muscle Mass Bluetooth BMI, 14 Body Composition Analyzer, 400 lb (Pred: 5.00)
  4. Item 1 Pair Happystep Comfort Memory Foam Insoles, Orthotic PU Shoe Inserts, Arch Support, Heel Cushioning, Shock Absorption, Plantar Fasciitis Foot Pain Relief for Men and Women (US W: 5-6) (Pred: 5.00)
  5. Item 1 X 10 Energizer 377 376 Watch Battery SR626SW SR626W (Pred: 5.00)

User AE22KNY5CIUZ4TMKXJKKVXP4JHMQ:
  1. Item simplehuman Trash Can Liner A, 4.5 Liters/1.2 Gallons, 30-Cou

## 9. Hybrid Recommendation Strategy    

In [7]:
# 1. Prepare Item Features
print("Preparing Item Features (Robust: Text + Price + Is_Green)...")

# Reload dataset to ensure we have all columns
df_meta = load_data('Amazon_health&household_label_encoded.csv')

if df_meta is not None:
    # Separate unique items
    # We need 'item_id', 'item', 'price', 'text', 'is_green'
    # Assuming 'item' is the unique name, but let's strictly use the index from R_df to align later
    
    # Let's map R_df columns (items) to their metadata
    unique_items = R_df.columns.tolist()
    item_metadata = df_meta[['item', 'price', 'text', 'is_green']].drop_duplicates(subset='item').set_index('item')
    
    # Reindex to match R_df columns structure exactly
    item_metadata = item_metadata.reindex(unique_items)
    
    # Handle NaNs
    item_metadata['text'] = item_metadata['text'].fillna('')
    item_metadata['price'] = item_metadata['price'].fillna(item_metadata['price'].median())
    item_metadata['is_green'] = item_metadata['is_green'].fillna(0).astype(int)
    
    print(f"Unique Items aligned with Matrix: {len(item_metadata)}")
    
    # A. Text (TF-IDF)
    print("  Computing TF-IDF...")
    tfidf = TfidfVectorizer(stop_words='english', max_features=100)
    text_matrix = tfidf.fit_transform(item_metadata['text']).toarray()
    
    # B. Price (Normalized)
    print("  Normalizing Price...")
    scaler = MinMaxScaler()
    price_vec = scaler.fit_transform(item_metadata[['price']])
    
    # C. Is_Green
    green_vec = item_metadata[['is_green']].values
    
    # D. Combine
    item_features = np.hstack([text_matrix, price_vec, green_vec])
    print(f"  Item-Feature Matrix Ready: {item_features.shape}")

# 2. Build User Profiles
def build_user_profiles(R_df, item_features_matrix):
    print("\nBuilding User Profiles...")
    user_profiles = {}
    
    # cold_start_vector = Global Average of all items
    cold_start_vector = np.mean(item_features_matrix, axis=0)
    
    for user_id in R_df.index:
        # Get user's ratings
        user_ratings = R_df.loc[user_id]
        
        # Filter for rated items only (non-NaN)
        rated_items = user_ratings[user_ratings.notna()]
        
        if rated_items.empty:
            user_profiles[user_id] = cold_start_vector
            continue
            
        # Get indices of rated items
        # R_df.columns are items, which align with item_features_matrix rows 0..N
        # We need integer indices of these rated items
        # Since item_metadata was reindexed by R_df.columns, index i corresponds to column i
        
        # Get boolean mask or integer indices?
        # Let's get integer indices of the rated items in the columns list
        item_indices = [R_df.columns.get_loc(item) for item in rated_items.index]
        
        # Get vectors
        rated_item_vectors = item_features_matrix[item_indices]
        
        # Get ratings weights
        weights = rated_items.values.reshape(-1, 1)
        
        # Weighted Average
        weighted_sum = np.sum(rated_item_vectors * weights, axis=0)
        total_weight = np.sum(weights)
        
        if total_weight == 0:
             user_profiles[user_id] = cold_start_vector
        else:
             user_profiles[user_id] = weighted_sum / total_weight
             
    print(f"  Built profiles for {len(user_profiles)} users.")
    return user_profiles

if 'item_features' in locals():
    user_profiles = build_user_profiles(R_df, item_features)

def get_hybrid_recommendations(user_idx, alpha, R_df, item_based_pred_centered, user_profiles, item_features, top_n=5):
    # alpha: Weight for Content-Based
    # (1-alpha): Weight for CF (Item-Based)
    
    user_id = R_df.index[user_idx]
    
    # --- 1. CF Component (Item-Based) ---
    # We already have predictions from Section 2: item_based_pred_centered
    # This is (Users x Items). Get row.
    cf_scores_centered = item_based_pred_centered[user_idx, :]
    
    # Add User Mean to get absolute ratings scale (1-5 approx)
    user_mean = R_df.iloc[user_idx].mean(skipna=True)
    if pd.isna(user_mean): user_mean = 0 # Handle cold start user mean
    cf_scores = cf_scores_centered + user_mean
    
    # Normalize CF Scores (0-1)
    cf_min, cf_max = cf_scores.min(), cf_scores.max()
    if cf_max - cf_min != 0:
        cf_norm = (cf_scores - cf_min) / (cf_max - cf_min)
    else:
        cf_norm = np.zeros_like(cf_scores)
        
    # --- 2. CB Component (User Profile Similarity) ---
    if user_id in user_profiles:
        user_prof_vec = user_profiles[user_id].reshape(1, -1)
        # Cosine Similarity with ALL items
        cb_scores = cosine_similarity(user_prof_vec, item_features).flatten()
    else:
        # Fallback if missing profile
        cb_scores = np.zeros(len(item_features))
        
    # Normalize CB Scores (cosine is -1 to 1, usually 0 to 1 for non-negative vectors)
    # Let's MinMax it too to be safe and consistent
    cb_min, cb_max = cb_scores.min(), cb_scores.max()
    if cb_max - cb_min != 0:
        cb_norm = (cb_scores - cb_min) / (cb_max - cb_min)
    else:
        cb_norm = np.zeros_like(cb_scores)
        
    # --- 3. Combine ---
    hybrid_scores = (alpha * cb_norm) + ((1 - alpha) * cf_norm)
    
    # --- 4. Recommend ---
    observed_items = R_df.iloc[user_idx].dropna().index
    predictions = pd.Series(hybrid_scores, index=R_df.columns)
    predictions = predictions.drop(labels=observed_items, errors='ignore')
    
    return predictions.sort_values(ascending=False).head(top_n)

# Note: We need 'R_pred_item_centered' from Section 2 for the Item-Based Scores
if 'R_pred_item_centered' in locals() and 'user_profiles' in locals():
    alphas = [0.3, 0.5, 0.7]
    target_users = range(5)
    
    for alpha in alphas:
        print(f"\n{'='*50}")
        print(f"Hybrid Recommendations (alpha={alpha} [CB={alpha}, CF={1-alpha}])")
        print(f"{'='*50}")
        
        for u_idx in target_users:
            user_real_id = R_df.index[u_idx]
            # Pass R_pred_item_centered to Hybrid
            recs = get_hybrid_recommendations(u_idx, alpha, R_df, R_pred_item_centered, user_profiles, item_features)
            
            print(f"\nUser {user_real_id}:")
            for i, (item_name, score) in enumerate(recs.items()):
                print(f"  {i+1}. Item {item_name} (Score: {score:.4f})")

Preparing Item Features (Robust: Text + Price + Is_Green)...
 Found requested table at: ..\data\Amazon_health&household_label_encoded.csv
Unique Items aligned with Matrix: 1000
  Computing TF-IDF...
  Normalizing Price...
  Item-Feature Matrix Ready: (1000, 102)

Building User Profiles...
  Built profiles for 10000 users.

Hybrid Recommendations (alpha=0.3 [CB=0.3, CF=0.7])

User AE22FRNBP2JT5DYDEZSPVR3LKQDA:
  1. Item Vive Wheelchair Bag for Accessories - Fits Walkers, Rollators, and Chairs - Adjustable Senior Backpack - Folding Bag for Elderly Women, Rolling Tray Caddy Pouch Adult Tote Attachment, Water Resistant (Score: 0.2341)
  2. Item Etekcity Digital Bathroom Scale for Body Weight for People, Extra Wide Platform and High Capacity, Large Number and Easy-to-Read on Backlit LCD Display, 440 lb (Score: 0.2340)
  3. Item Medline Bed Assist Bar with Storage Pocket, Height Adjustable Bed Rail for Elderly Adults, Assistance for Getting In and Out of Bed at Home (Score: 0.2201)
  4. Item

## 10. Cold-Start Handling

In [8]:
print("\n10. Cold-Start Analysis")
print("-"*30)

# Find users with specific numbers of ratings
user_counts = R_df.notna().sum(axis=1)

targets = [3, 5, 10]
cold_start_users = {}

for t in targets:
    # Find users with approx t ratings (t to t+2) to ensure we find someone
    candidates = user_counts[(user_counts >= t) & (user_counts <= t+2)].index
    if len(candidates) > 0:
        cold_start_users[t] = R_df.index.get_loc(candidates[0])
    else:
        print(f"No user found with ~{t} ratings.")

# Define Popularity Baseline Function
def get_popular_recommendations(original_df_pivot, top_n=5):
    # Mean rating per item (or count)
    # Let's use count of ratings -> Popularity
    item_counts = original_df_pivot.notna().sum(axis=0)
    return item_counts.sort_values(ascending=False).head(top_n)

popular_items = get_popular_recommendations(R_df)
print("\nMost Popular Items (Baseline):")
print(popular_items.index.tolist())

# Run Comparison for Cold Start Users
if 'R_pred_item_centered' in locals() and 'user_profiles' in locals():
    for t, u_idx in cold_start_users.items():
        user_real_id = R_df.index[u_idx]
        print(f"\nUser with ~{t} ratings ({user_real_id}):")
        
        # history
        obs = R_df.iloc[u_idx].dropna().index.tolist()
        print(f"  History: {obs[:3]} ... ({len(obs)} total)")
        
        # Hybrid Recs (alpha=0.5)
        hybrid_recs = get_hybrid_recommendations(u_idx, 0.5, R_df, R_pred_item_centered, user_profiles, item_features)
        print("  [Hybrid Recommendations]:")
        for i, (item, score) in enumerate(hybrid_recs.items()):
            print(f"    {i+1}. {item}")


10. Cold-Start Analysis
------------------------------

Most Popular Items (Baseline):
['Etekcity Digital Body Weight Bathroom Scale with Step-On Technology, 400 Lb, Blue', 'Amazon Basics 36-Pack AA Alkaline High-Performance Batteries, 1.5 Volt, 10-Year Shelf Life', 'Amazon Basics 20-Pack AAA Alkaline High-Performance Batteries, 1.5 Volt, 10-Year Shelf Life', 'Wool Dryer Balls - Smart Sheep 6-Pack - XL Premium Natural Fabric Softener Award-Winning - Wool Balls Replaces Dryer Sheets - Wool Balls for Dryer - Laundry Balls for Dryer', 'Waterpik Aquarius Water Flosser Professional For Teeth, Gums, Braces, Dental Care, Electric Power With 10 Settings, 7 Tips For Multiple Users And Needs, ADA Accepted, Black WP-662']

User with ~3 ratings (AE22UINNLKIXVSCQF6YL7ZKNODHQ):
  History: ['Amazon Brand - Presto! Flex-a-Size Paper Towels, 948 Sheet Count - 158 Sheet per roll (1 Pack of 6), 6 Huge Rolls Equivalent to 19 Regular Rolls, White', 'Georgia-Pacific Angel Soft Professional Series Premium 2

## 11. Baseline Comparison

In [9]:
print("\n11. Baseline Comparison")
print("-"*30)

# We will qualitatively compare recommendations for a single test user
# to see the 'flavor' of recommendations from each model.

test_user_idx = list(cold_start_users.values())[0] if cold_start_users else 0
test_user_id = R_df.index[test_user_idx]

# 1. Random
all_items = R_df.columns.tolist()
random_recs = np.random.choice(all_items, 5, replace=False)

# 2. Popularity
pop_recs = popular_items.index.tolist()

# 3. Pure Content (Hybrid alpha=1.0)
cb_recs = get_hybrid_recommendations(test_user_idx, 1.0, R_df, R_pred_item_centered, user_profiles, item_features).index.tolist()

# 4. Pure CF (Hybrid alpha=0.0)
# This is now ITEM-BASED CF
cf_recs = get_hybrid_recommendations(test_user_idx, 0.0, R_df, R_pred_item_centered, user_profiles, item_features).index.tolist()

# 5. Hybrid (alpha=0.5)
hyb_recs = get_hybrid_recommendations(test_user_idx, 0.5, R_df, R_pred_item_centered, user_profiles, item_features).index.tolist()

# Display
comparison_df = pd.DataFrame({
    'Metric/Model': ['Random', 'Popularity', 'Pure Content (CB)', 'Pure CF (SVD)', 'Hybrid (0.5)'],
    'Top 1 Recommendation': [random_recs[0], pop_recs[0], cb_recs[0], cf_recs[0], hyb_recs[0]],
    'Top 2 Recommendation': [random_recs[1], pop_recs[1], cb_recs[1], cf_recs[1], hyb_recs[1]]
})

print(f"\nComparison for User {test_user_id}:")
print(comparison_df.to_string(index=False))


11. Baseline Comparison
------------------------------

Comparison for User AE22UINNLKIXVSCQF6YL7ZKNODHQ:
     Metric/Model                                                                                                                                                                         Top 1 Recommendation                                                                                                                                                                                    Top 2 Recommendation
           Random Sleep Mask | Eye Mask for Sleeping Men/Women Better Than Silk Our Luxury Blackout Contoured Eye Masks are Comfortable - This Sleeping mask Set Includes Carry Pouch and Ear Plugs (No Scent)                                                                                                                                                      OxiClean Versatile Stain Remover Powder, 7.22 Lbs.
       Popularity                                                              