In [None]:
# Load spotify playlists data
import random
import pandas as pd

playlists_df = pd.read_csv(
    "data/spotify_playlists.csv", 
    on_bad_lines='skip',
    quoting=1,  # QUOTE_ALL
    encoding='utf-8',
    encoding_errors='ignore',
    nrows=500000
)
print("Playlists data loaded:")
print(f"Total rows: {len(playlists_df)}")
print(f"Column names: {playlists_df.columns.tolist()}")
print(f"\nFirst few rows:\n{playlists_df.head()}")

# Strip quotes from column names if present
playlists_df.columns = playlists_df.columns.str.strip().str.replace('"', '')
print(f"\nCleaned column names: {playlists_df.columns.tolist()}")

if 'playlistname' in playlists_df.columns:
    print(f"\nUnique playlists: {playlists_df['playlistname'].nunique()}")
    print(f"\nSample playlists:\n{playlists_df['playlistname'].value_counts().head()}")
else:
    print("\nNote: 'playlistname' column not found. Check column names above.")

Playlists data loaded:
Total rows: 500000
Column names: ['user_id', ' "artistname"', ' "trackname"', ' "playlistname"']

First few rows:
                            user_id                      "artistname"  \
0  9cc0cfd4d7d7885102480dd99e7a90d6                    Elvis Costello   
1  9cc0cfd4d7d7885102480dd99e7a90d6  Elvis Costello & The Attractions   
2  9cc0cfd4d7d7885102480dd99e7a90d6                      Tiffany Page   
3  9cc0cfd4d7d7885102480dd99e7a90d6  Elvis Costello & The Attractions   
4  9cc0cfd4d7d7885102480dd99e7a90d6                    Elvis Costello   

                                         "trackname"  "playlistname"  
0               (The Angels Wanna Wear My) Red Shoes  HARD ROCK 2010  
1  (What's So Funny 'Bout) Peace, Love And Unders...  HARD ROCK 2010  
2                                   7 Years Too Late  HARD ROCK 2010  
3                              Accidents Will Happen  HARD ROCK 2010  
4                                             Alison  HARD ROCK 2010 

In [None]:
import unicodedata, re

def normalize_text(s):
    if pd.isna(s):
        return ''
    s = str(s).lower().strip()
    s = unicodedata.normalize('NFKD', s)
    s = re.sub(r'\s+', ' ', s)
    # remove common parenthetical tags like (remastered), [live], - remaster
    s = re.sub(r'\s*\([^)]*\)\s*', ' ', s)
    s = re.sub(r'\s*\[[^]]*\]\s*', ' ', s)
    s = re.sub(r'- remaster(ed)?', '', s)
    s = re.sub(r"'", '', s)
    s = s.strip()
    return s

# Add normalized columns
playlists_df['t_norm'] = playlists_df['trackname'].astype(str).map(normalize_text)
playlists_df['a_norm'] = playlists_df['artistname'].astype(str).map(normalize_text)

print('Added normalized columns to playlists_df')

Added normalized columns to playlists_df


In [None]:
import pandas as pd
recommendationInfo_df = pd.read_csv('data/recommendation_info.csv', encoding='utf-8', encoding_errors='ignore', on_bad_lines='skip')
recommendationInfo_df.columns = recommendationInfo_df.columns.str.strip().str.replace('"', '')
print('Loaded recommendationInfo_df')

# Add normalized columns to recommendationInfo_df
recommendationInfo_df['t_norm'] = recommendationInfo_df['track_name'].astype(str).map(normalize_text)
recommendationInfo_df['a_norm'] = recommendationInfo_df['track_artist'].astype(str).map(normalize_text)

# Build lookup mapping from (t_norm,a_norm) to dataset index
rec_lookup = {}
for idx, row in recommendationInfo_df.iterrows():
    key = (row.t_norm, row.a_norm)
    if key not in rec_lookup:
        rec_lookup[key] = idx

print(f'Recommendation lookup size: {len(rec_lookup)} unique songs (from {len(recommendationInfo_df)} total entries)')

Loaded recommendationInfo_df
Recommendation lookup size: 25942 unique songs (from 32828 total entries)
Recommendation lookup size: 25942 unique songs (from 32828 total entries)


In [None]:
# Map playlist rows to dataset indices
def map_row_to_idx(row):
    return rec_lookup.get((row['t_norm'], row['a_norm']))

playlists_df['dataset_idx'] = playlists_df.apply(map_row_to_idx, axis=1)

# playlist statistics
stats = playlists_df.groupby('playlistname').agg(\
    total_count=('trackname', 'count'),
    matched_count=('dataset_idx', lambda s: s.notnull().sum())\
).reset_index()

print('Playlist stats sample:')
print(stats.sort_values('matched_count', ascending=False).head())

Playlist stats sample:
               playlistname  total_count  matched_count
6452                Starred        46772           6950
4090       Liked from Radio         7208           1305
2399     Everything at once         7892            762
2498  Favoritas de la radio         2237            723
6508                 Strane         6825            532


In [None]:
# Pick 20 playlists for CF model training
min_size = 20
max_size = 300
min_matched = 30
candidates = stats[(stats['total_count'].between(min_size, max_size)) & (stats['matched_count'] >= min_matched)].copy()
print(f'Found {len(candidates)} candidate playlists between {min_size}-{max_size} tracks with at least {min_matched} matched songs')

chosen = candidates.sort_values('matched_count', ascending=False).head(20)['playlistname'].tolist()
print('Chosen playlists for evaluation (examples):')
print(chosen)

Found 212 candidate playlists between 20-300 tracks with at least 30 matched songs
Chosen playlists for evaluation (examples):
['Birgit Party', 'ZLX', "80's (full)", 'Afternoon Delight', 'hippity hoppity', 'New Year 2011/2012', 'random', 'Mobil', 'Selections', 'high school days', 'ailygames', 'rock', 'Pump Up the Jam', 'Tiririri tururu lerelere', 'partypants', 'EDM | House | Tomorrowland 2015 | Latest house | summerburst 2015 | kygo | avicii | Axwell ingrosso | Magaluf 2015', 'Scelto dalla radio', 'Soundtrack de vida', 'Childhood memories', 'Norris']


In [None]:
# Build playlist x item CSR matrix, only include playlists that have at least one matched row
from scipy.sparse import csr_matrix

playlist_names = playlists_df['playlistname'].astype(str).unique().tolist()
playlist_to_id = {p:i for i,p in enumerate(playlist_names)}
item_count = len(recommendationInfo_df)
rows, cols, vals = [], [], []
for _, r in playlists_df.dropna(subset=['dataset_idx']).iterrows():
    pid = playlist_to_id[str(r['playlistname'])]
    item_idx = int(r['dataset_idx'])
    rows.append(pid); cols.append(item_idx); vals.append(1)

user_item = csr_matrix((vals, (rows, cols)), shape=(len(playlist_to_id), item_count))
print('Built user_item matrix with shape:', user_item.shape)

id_to_playlist = {v:k for k,v in playlist_to_id.items()}

Built user_item matrix with shape: (8809, 32828)


In [None]:
# collaborative filtering using SVD
from sklearn.decomposition import TruncatedSVD
import numpy as np

print('Training collaborative filtering model using SVD...')
print(f'Input matrix shape: {user_item.shape}')

n_factors = 128
svd = TruncatedSVD(n_components=n_factors, random_state=42)

# Fit on the user-item matrix and get item factors
playlist_factors = svd.fit_transform(user_item)
item_factors = svd.components_.T  # (n_items, n_factors)

print(f'SVD trained')
print(f'Item factors shape: {item_factors.shape}')
print(f'Playlist factors shape: {playlist_factors.shape}')
print(f'Explained variance ratio: {svd.explained_variance_ratio_.sum():.4f}')

Training collaborative filtering model using SVD...
Input matrix shape: (8809, 32828)
SVD trained
Item factors shape: (32828, 128)
Playlist factors shape: (8809, 128)
Explained variance ratio: 0.7214


In [61]:
# Collaborative recommendation wrapper using trained item_factors
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def recommend_collaborative(seed_song_tuples, n_recs=50):
    if item_factors is None:
        return 'CF model not trained; item_factors is None'
    seed_idxs = []
    for title, artist in seed_song_tuples:
        key = (normalize_text(title), normalize_text(artist))
        idx = rec_lookup.get(key)
        if idx is not None:
            seed_idxs.append(idx)
    if not seed_idxs:
        return 'No valid seeds found in recommendation dataset'
    query = item_factors[seed_idxs].mean(axis=0).reshape(1,-1)
    sims = cosine_similarity(query, item_factors).flatten()
    ranking = np.argsort(-sims)
    recs = [r for r in ranking if r not in seed_idxs][:n_recs]
    return recommendationInfo_df.iloc[recs].reset_index(drop=True)

print('recommend_collaborative defined')

recommend_collaborative defined


In [9]:
# adjusted evaluate_playlist function from KNN model for CF model
def evaluate_playlist(playlist_name, n_seed=5, n_recs=10):
    """
    Takes a playlist name, randomly selects n_seed songs from it that exist in our dataset,
    gets recommendations, and checks if any recommendations are from the original playlist.
    
    Parameters:
    - playlist_name: name of playlist in spotify_playlists.csv
    - n_seed: number of songs to use as input (default 5)
    - n_recs: number of recommendations to generate (default 10)
    
    Returns:
    - Dictionary with evaluation results
    """
    # Get all songs from the specified playlist
    playlist_songs = playlists_df[playlists_df['playlistname'] == playlist_name].copy()
    
    if len(playlist_songs) == 0:
        return {"error": f"Playlist '{playlist_name}' not found"}
    
    # Build output in memory
    output = []
    output.append(f"\n{'='*60}")
    output.append(f"Evaluating Playlist: {playlist_name}")
    output.append(f"{'='*60}")
    output.append(f"Total songs in playlist: {len(playlist_songs)}")
    
    # Find which playlist songs exist in our recommendation dataset (using pre-mapped dataset_idx)
    matched_songs = []
    for idx, row in playlist_songs.iterrows():
        if pd.notna(row.get('dataset_idx')):
            matched_songs.append({
                'track': row['trackname'],
                'artist': row['artistname'],
                'dataset_idx': int(row['dataset_idx'])
            })
    
    output.append(f"Songs found in our dataset: {len(matched_songs)}/{len(playlist_songs)}")
    
    if len(matched_songs) < n_seed:
        return {
            "error": f"Not enough songs in dataset. Found {len(matched_songs)}, need at least {n_seed}",
            "matched_count": len(matched_songs),
            "total_count": len(playlist_songs)
        }
    
    # Randomly select n_seed songs as input
    random.seed(42)  # For reproducibility
    seed_songs = random.sample(matched_songs, n_seed)
    remaining_songs = [s for s in matched_songs if s not in seed_songs]
    
    output.append(f"\n--- Input Songs (randomly selected {n_seed}) ---")
    for i, song in enumerate(seed_songs, 1):
        output.append(f"{i}. {song['track']} — {song['artist']}")
    
    # Get recommendations using collaborative filtering
    seed_list = [(song['track'], song['artist']) for song in seed_songs]
    recs_df = recommend_collaborative(seed_list, n_recs=n_recs)
    
    # Check if recommend_collaborative returned an error
    if isinstance(recs_df, str):
        return {"error": recs_df}
    
    remaining_indices = {s['dataset_idx'] for s in remaining_songs}
    hits = []
    
    output.append(f"\n--- Recommendations ({len(recs_df)} songs) ---")
    
    # Get the original indices from recommendationInfo_df
    for i in range(len(recs_df)):
        row = recs_df.iloc[i]
        track = row['track_name']
        artist = row['track_artist']
        
        # Find the original index in recommendationInfo_df
        mask = (
            recommendationInfo_df["track_name"].str.lower().str.strip() == track.lower().strip()
        ) & (
            recommendationInfo_df["track_artist"].str.lower().str.strip() == artist.lower().strip()
        )
        
        if mask.sum() > 0:
            original_idx = recommendationInfo_df[mask].index[0]
            in_playlist = "✓ IN PLAYLIST" if original_idx in remaining_indices else ""
            output.append(f"{i+1}. {track} — {artist} {in_playlist}")
            
            if original_idx in remaining_indices:
                hits.append({'track': track, 'artist': artist, 'rank': i+1})
        else:
            output.append(f"{i+1}. {track} — {artist}")
    
    # Calculate metrics
    hit_rate = 1 if len(hits) > 0 else 0
    precision = len(hits) / n_recs if n_recs > 0 else 0
    recall = len(hits) / len(remaining_songs) if len(remaining_songs) > 0 else 0
    
    output.append(f"\n--- Evaluation Results ---")
    output.append(f"Songs in playlist (in dataset): {len(matched_songs)}")
    output.append(f"Used as input: {n_seed}")
    output.append(f"Held-out (ground truth): {len(remaining_songs)}")
    output.append(f"Recommendations generated: {len(recs_df)}")
    output.append(f"Hits (songs from playlist recommended): {len(hits)}")
    output.append(f"Hit Rate (at least 1 hit): {hit_rate}")
    output.append(f"Precision@{n_recs}: {precision:.4f}")
    output.append(f"Recall@{n_recs}: {recall:.4f}")
    
    if hits:
        output.append(f"\nHit songs (from original playlist):")
        for hit in hits:
            output.append(f"  - Rank {hit['rank']}: {hit['track']} — {hit['artist']}")
    
    # Print everything at once
    print('\n'.join(output))
    
    return {
        'playlist_name': playlist_name,
        'total_in_playlist': len(playlist_songs),
        'matched_in_dataset': len(matched_songs),
        'seed_count': n_seed,
        'held_out_count': len(remaining_songs),
        'n_recs': n_recs,
        'hits': len(hits),
        'hit_rate': hit_rate,
        'precision': precision,
        'recall': recall,
        'hit_songs': hits
    }

In [62]:
# Quick evaluation helper: run evaluate_playlist on chosen playlists and summarize
def eval_playlists(names, n_seed=10, n_recs=50):
    results = []
    for name in names:
        try:
            print('Evaluating', name)
            res = evaluate_playlist(name, n_seed=n_seed, n_recs=n_recs)
            results.append(res)
        except Exception as e:
            print('Error evaluating', name, e)
    return results

# Example: evaluate the chosen playlists if available
try:
    example_names = chosen
    res = eval_playlists(example_names, n_seed=10, n_recs=50)
    import pandas as pd
    df_res = pd.DataFrame(res)
    print('Mean precision:', df_res['precision'].mean())
except NameError:
    print('No chosen playlists defined yet; run the selection cell first')

Evaluating Birgit Party

Evaluating Playlist: Birgit Party
Total songs in playlist: 290
Songs found in our dataset: 155/290

--- Input Songs (randomly selected 10) ---
1. Carry on Wayward Son — Kansas
2. Always On My Mind — Pet Shop Boys
3. Immigrant Song — Led Zeppelin
4. Hot Blooded (2008 Remastered LP Version) — Foreigner
5. Heart Of Gold — Neil Young
6. Dancing Queen — ABBA
7. Brown Sugar — The Rolling Stones
8. Tiny Dancer — Elton John
9. Born in the U.S.A. — Bruce Springsteen
10. You Ain't Seen Nothing Yet — Bachman-Turner Overdrive

--- Recommendations (50 songs) ---
1. The Devil Went Down To Georgia — The Charlie Daniels Band ✓ IN PLAYLIST
2. Goodbye Yellow Brick Road — Elton John 
3. Rocket Man (I Think It's Going To Be A Long Long Time) — Elton John ✓ IN PLAYLIST
4. Show Me The Way — Peter Frampton 
5. Kodachrome — Paul Simon ✓ IN PLAYLIST
6. Life in a Northern Town — The Dream Academy 
7. Honky Cat — Elton John 
8. Bennie And The Jets — Elton John 
9. Reach (NBC Olympic Vers

CBF model

In [53]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

print('Loading features from spotify_df_cleaned.csv...')

# Load the cleaned Spotify data with all features
spotify_df = pd.read_csv('data/spotify_df_cleaned.csv', encoding='utf-8', encoding_errors='ignore')
print(f'Loaded {len(spotify_df)} songs with features')
print(f'Available columns: {spotify_df.columns.tolist()}')

# Define all available features (audio + metadata)
numeric_features = [
    'track_popularity',  # Popularity signal!
    'danceability', 'energy', 'key', 'loudness', 'mode',
    'speechiness', 'acousticness', 'instrumentalness',
    'liveness', 'valence', 'tempo', 'duration_ms', 'release_year'
]

categorical_features = [
    'playlist_genre', 'playlist_subgenre'
]

# Extract numeric features
numeric_data = spotify_df[numeric_features].copy()
print(f'\nUsing {len(numeric_features)} numeric features: {numeric_features}')

# Handle NaN values in numeric features
nan_count = numeric_data.isna().sum().sum()
if nan_count > 0:
    print(f'Filling {nan_count} NaN values in numeric features...')
    numeric_data = numeric_data.fillna(numeric_data.mean())

# Encode categorical features
encoded_features = []
for cat_col in categorical_features:
    if cat_col in spotify_df.columns:
        print(f'Encoding {cat_col}...')
        le = LabelEncoder()
        encoded = le.fit_transform(spotify_df[cat_col].fillna('unknown'))
        encoded_features.append(encoded.reshape(-1, 1))

# Combine all features
if encoded_features:
    categorical_array = np.hstack(encoded_features)
    feature_matrix = np.hstack([numeric_data.values, categorical_array])
    print(f'Combined features: {numeric_data.shape[1]} numeric + {categorical_array.shape[1]} categorical')
else:
    feature_matrix = numeric_data.values
    print(f'Using only numeric features')

# Note: Numeric features are already standardized in spotify_df_cleaned.csv
# Only standardize the categorical features to match the scale
if encoded_features:
    print('Standardizing categorical features to match pre-scaled numeric features...')
    scaler = StandardScaler()
    categorical_scaled = scaler.fit_transform(categorical_array)
    feature_matrix = np.hstack([numeric_data.values, categorical_scaled])

print(f'Final feature matrix shape: {feature_matrix.shape}')

Loading features from spotify_df_cleaned.csv...
Loaded 32828 songs with features
Available columns: ['track_id', 'track_popularity', 'track_album_release_date', 'playlist_name', 'playlist_genre', 'playlist_subgenre', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'release_year']

Using 14 numeric features: ['track_popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'release_year']
Filling 1886 NaN values in numeric features...
Encoding playlist_genre...
Encoding playlist_subgenre...
Combined features: 14 numeric + 2 categorical
Final feature matrix shape: (32828, 16)


In [None]:
# CBF Recommendation Function
def recommend_cbf(seed_song_tuples, n_recs=50):
    """Content-based filtering using audio features"""
    seed_idxs = []
    for title, artist in seed_song_tuples:
        key = (normalize_text(title), normalize_text(artist))
        idx = rec_lookup.get(key)
        if idx is not None:
            seed_idxs.append(idx)
    
    if not seed_idxs:
        return 'No valid seeds found in recommendation dataset'
    
    # Average the feature vectors of seed songs
    seed_features = feature_matrix[seed_idxs].mean(axis=0).reshape(1, -1)
    
    # Compute cosine similarity to all songs
    similarities = cosine_similarity(seed_features, feature_matrix).flatten()
    
    # Get top recommendations excluding seeds
    ranking = np.argsort(-similarities)
    recs = [idx for idx in ranking if idx not in seed_idxs][:n_recs]
    
    return recommendationInfo_df.iloc[recs].reset_index(drop=True)

Hybrid Model: Combine both CF and CBF approaches with weighted averaging

In [None]:
# Hybrid Recommendation Function (CF + CBF)
def recommend_hybrid(seed_song_tuples, n_recs=50, cf_weight=0.85, cbf_weight=0.15):
    """Hybrid: combines collaborative and content-based filtering"""
    # Get seed indices
    seed_idxs = []
    for title, artist in seed_song_tuples:
        key = (normalize_text(title), normalize_text(artist))
        idx = rec_lookup.get(key)
        if idx is not None:
            seed_idxs.append(idx)
    
    if not seed_idxs:
        return 'No valid seeds found in recommendation dataset'
    
    # Get CF similarities, SVD item factors
    cf_query = item_factors[seed_idxs].mean(axis=0).reshape(1,-1)
    cf_sims = cosine_similarity(cf_query, item_factors).flatten()
    
    # Get CBF similarities, from audio features
    cbf_query = feature_matrix[seed_idxs].mean(axis=0).reshape(1, -1)
    cbf_sims = cosine_similarity(cbf_query, feature_matrix).flatten()
    
    # Normalize
    cf_sims_norm = (cf_sims - cf_sims.min()) / (cf_sims.max() - cf_sims.min() + 1e-10)
    cbf_sims_norm = (cbf_sims - cbf_sims.min()) / (cbf_sims.max() - cbf_sims.min() + 1e-10)
    
    # Weighted combination
    hybrid_scores = (cf_weight * cf_sims_norm) + (cbf_weight * cbf_sims_norm)
    
    # top recommendations
    ranking = np.argsort(-hybrid_scores)
    recs = [idx for idx in ranking if idx not in seed_idxs][:n_recs]
    
    return recommendationInfo_df.iloc[recs].reset_index(drop=True)

Model Comparison: CF, CBF, and Hybrid

In [None]:
# Unified evaluation function for any model
def evaluate_model(recommend_func, playlist_name, n_seed=10, n_recs=50, model_name="Model"):
    """Evaluate any recommendation model (CF, CBF, or Hybrid)"""
    playlist_songs = playlists_df[playlists_df['playlistname'] == playlist_name].copy()
    
    if len(playlist_songs) == 0:
        return None
    
    matched_songs = []
    for idx, row in playlist_songs.iterrows():
        if pd.notna(row.get('dataset_idx')):
            matched_songs.append({
                'track': row['trackname'],
                'artist': row['artistname'],
                'dataset_idx': int(row['dataset_idx'])
            })
    
    if len(matched_songs) < n_seed:
        return None
    
    # Split into seed and holdout
    random.seed(42)
    seed_songs = random.sample(matched_songs, n_seed)
    remaining_songs = [s for s in matched_songs if s not in seed_songs]
    
    # Get recommendations using specified model
    seed_list = [(s['track'], s['artist']) for s in seed_songs]
    recs_df = recommend_func(seed_list, n_recs=n_recs)
    
    if isinstance(recs_df, str):
        return None
    
    # Calculate metrics
    remaining_indices = {s['dataset_idx'] for s in remaining_songs}
    hits = 0
    for i in range(len(recs_df)):
        track = recs_df.iloc[i]['track_name']
        artist = recs_df.iloc[i]['track_artist']
        
        # Find the index in recommendationInfo_df
        mask = (
            recommendationInfo_df["track_name"].str.lower().str.strip() == track.lower().strip()
        ) & (
            recommendationInfo_df["track_artist"].str.lower().str.strip() == artist.lower().strip()
        )
        
        if mask.sum() > 0:
            rec_idx = recommendationInfo_df[mask].index[0]
            if rec_idx in remaining_indices:
                hits += 1
    
    precision = hits / n_recs if n_recs > 0 else 0
    recall = hits / len(remaining_songs) if len(remaining_songs) > 0 else 0
    hit_rate = 1 if hits > 0 else 0
    
    return {
        'model': model_name,
        'playlist': playlist_name,
        'precision': precision,
        'recall': recall,
        'hit_rate': hit_rate,
        'hits': hits,
        'total_holdout': len(remaining_songs)
    }

In [None]:
# Compare CBF, CF, and Hybrid models on chosen playlists
def compare_models(playlist_names, n_seed=10, n_recs=50):
    results = []
    
    for name in playlist_names:
        print(f'Evaluating {name}...')
        
        # CF
        res_cf = evaluate_model(recommend_collaborative, name, n_seed, n_recs, "CF")
        if res_cf: 
            results.append(res_cf)
            print(f"  CF: Precision={res_cf['precision']:.4f}")
        
        # CBF
        res_cbf = evaluate_model(recommend_cbf, name, n_seed, n_recs, "CBF")
        if res_cbf: 
            results.append(res_cbf)
            print(f"  CBF: Precision={res_cbf['precision']:.4f}")
        
        # Hybrid
        res_hybrid = evaluate_model(recommend_hybrid, name, n_seed, n_recs, "Hybrid")
        if res_hybrid: 
            results.append(res_hybrid)
            print(f"  Hybrid: Precision={res_hybrid['precision']:.4f}")
        
        print()
    
    # results
    df_results = pd.DataFrame(results)
    
    print("\n" + "="*70)
    print("MODEL COMPARISON SUMMARY")
    print("="*70)
    
    summary = df_results.groupby('model').agg({
        'precision': ['mean', 'std', 'min', 'max'],
        'recall': ['mean', 'std'],
        'hit_rate': 'mean'
    }).round(4)
    
    print(summary)
    
    best_model = df_results.groupby('model')['precision'].mean().idxmax()
    best_precision = df_results.groupby('model')['precision'].mean().max()
    print(f"\n Best Model: {best_model} (Mean Precision: {best_precision:.4f})")
    
    return df_results

print("Comparing models on 5 playlists...\n")
comparison_results = compare_models(chosen[:5], n_seed=10, n_recs=50)

Comparing models on 5 playlists...

Evaluating Birgit Party...
  CF: Precision=0.3200
  CBF: Precision=0.0000
  Hybrid: Precision=0.3600

Evaluating ZLX...
  CF: Precision=0.2800
  CBF: Precision=0.0000
  Hybrid: Precision=0.3800

Evaluating 80's (full)...
  CF: Precision=0.5800
  CBF: Precision=0.0000
  Hybrid: Precision=0.6200

Evaluating Afternoon Delight...
  CF: Precision=0.4000
  CBF: Precision=0.0000
  Hybrid: Precision=0.4000

Evaluating hippity hoppity...
  CF: Precision=0.3600
  CBF: Precision=0.0600
  Hybrid: Precision=0.3600


MODEL COMPARISON SUMMARY
       precision                      recall         hit_rate
            mean     std   min   max    mean     std     mean
model                                                        
CBF        0.012  0.0268  0.00  0.06  0.0067  0.0149      0.2
CF         0.388  0.1163  0.28  0.58  0.1763  0.0661      1.0
Hybrid     0.424  0.1108  0.36  0.62  0.1909  0.0615      1.0

 Best Model: Hybrid (Mean Precision: 0.4240)
