# Popularity Baseline Evaluation with Real Features

This notebook evaluates a popularity-based recommendation system using comprehensive evaluation metrics at k values of 5, 10, 15, and 20, with real music content features.

## Overview
- **Baseline**: Popularity-based recommendations (most popular items for all users)
- **Features**: Real audio features, metadata, and embeddings from music data
- **Evaluation Metrics**: NDCG, Novelty, Diversity, Serendipity, Coverage
- **K Values**: 5, 10, 15, 20


## 1. Setup and Imports


In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.feature_extraction import FeatureHasher
from sentence_transformers import SentenceTransformer
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

print("✅ Libraries imported successfully!")


  from .autonotebook import tqdm as notebook_tqdm


✅ Libraries imported successfully!


In [2]:
# Import the standalone evaluation metrics
# Make sure standalone_evaluation_metrics.py is in the same directory
from standalone_evaluation_metrics import (
    quick_evaluate,
    evaluate_recommendations,
    print_evaluation_summary,
    ndcg_at_k,
    novelty_at_k,
    diversity_ild_at_k,
    serendipity_at_k,
    catalog_coverage_at_k,
    user_coverage_at_k
)

print("✅ Evaluation metrics imported successfully!")


✅ Evaluation metrics imported successfully!


## 2. Data Loading and Preparation


In [3]:
# Load the datasets
# Update the path to your data files
dataset_path = ''  # Update this path to your data directory

try:
    music_list = pd.read_csv(dataset_path + 'music_list.csv')
    user_behavior_list = pd.read_csv(dataset_path + 'user_behavior_list.csv')
    print("✅ Data loaded successfully!")
    print(f"Music list shape: {music_list.shape}")
    print(f"User behavior list shape: {user_behavior_list.shape}")
    
    # Display basic info about the datasets
    print(f"\nMusic list columns: {list(music_list.columns)}")
    print(f"User behavior list columns: {list(user_behavior_list.columns)}")
    
except FileNotFoundError as e:
    print(f"❌ Error loading data: {e}")
    print("Please update the dataset_path variable with the correct path to your data files.")
    raise


✅ Data loaded successfully!
Music list shape: (50683, 21)
User behavior list shape: (9711301, 3)

Music list columns: ['track_id', 'name', 'artist', 'spotify_preview_url', 'spotify_id', 'tags', 'genre', 'year', 'duration_ms', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature']
User behavior list columns: ['track_id', 'user_id', 'playcount']


In [4]:
# Data preprocessing following the popularity baseline approach
print("📊 Preprocessing data...")

# Filter users with at least 50
user_counts = user_behavior_list['user_id'].value_counts()
active_users = user_counts[user_counts >= 50].index
user_behavior_list = user_behavior_list[user_behavior_list['user_id'].isin(active_users)]

print(f"Active users (≥10 interactions): {len(active_users)}")

# Sample users for faster evaluation (optional)
if len(active_users) > 5000:
    rng = np.random.default_rng(seed=42)
    sampled_users = rng.choice(active_users, size=5000, replace=False)
    user_behavior_list = user_behavior_list[user_behavior_list['user_id'].isin(sampled_users)]
    print(f"Sampled users for evaluation: {len(sampled_users)}")

# Shuffle the data
user_behavior_list = shuffle(user_behavior_list, random_state=42)

# Train-test split (80-20)
train_df = user_behavior_list.groupby('user_id', group_keys=False).apply(
    lambda x: x.sample(frac=0.8, random_state=42)
)
test_df = user_behavior_list.drop(train_df.index)

print(f"Train interactions: {len(train_df)}")
print(f"Test interactions: {len(test_df)}")
print(f"Unique users in train: {train_df['user_id'].nunique()}")
print(f"Unique users in test: {test_df['user_id'].nunique()}")
print(f"Unique items in train: {train_df['track_id'].nunique()}")
print(f"Unique items in test: {test_df['track_id'].nunique()}")


📊 Preprocessing data...
Active users (≥10 interactions): 23795
Sampled users for evaluation: 5000
Train interactions: 303007
Test interactions: 75709
Unique users in train: 5000
Unique users in test: 5000
Unique items in train: 20008
Unique items in test: 13734


## 3. Real Content Features Processing


In [5]:
# Create real content features from music data
print("🎼 Creating real content features...")

# Get unique tracks from training data
all_items = sorted(train_df['track_id'].unique())
all_users = sorted(train_df['user_id'].unique())

# Filter music_list to only include tracks that are in our training data
sampled_music_list = music_list[music_list['track_id'].isin(all_items)].copy()
print(f"Filtered music list shape: {sampled_music_list.shape}")

# Check available columns
print(f"Available columns in music_list: {list(sampled_music_list.columns)}")

# Define numeric features (audio features)
numeric_features = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness',
                   'instrumentalness', 'liveness', 'valence', 'tempo', 'year']

# Check which numeric features are available
available_numeric = [col for col in numeric_features if col in sampled_music_list.columns]
print(f"Available numeric features: {available_numeric}")

# Define categorical features
onehot_features = ['genre'] if 'genre' in sampled_music_list.columns else []
hash_features = ['artist', 'tags']

# Check which categorical features are available
available_categorical = [col for col in onehot_features + hash_features if col in sampled_music_list.columns]
print(f"Available categorical features: {available_categorical}")

# Reset index for consistent indexing
original_track_ids = sampled_music_list['track_id']
sampled_music_list = sampled_music_list.reset_index(drop=True)


🎼 Creating real content features...
Filtered music list shape: (20008, 21)
Available columns in music_list: ['track_id', 'name', 'artist', 'spotify_preview_url', 'spotify_id', 'tags', 'genre', 'year', 'duration_ms', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature']
Available numeric features: ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'year']
Available categorical features: ['genre', 'artist', 'tags']


In [6]:
# Process numeric features
print("Processing numeric features...")
if available_numeric:
    numeric_data = sampled_music_list[available_numeric]
    
    # Handle missing values
    numeric_data = numeric_data.fillna(numeric_data.median())
    
    # Scale numeric features
    scaler = MinMaxScaler()
    scaled_numeric_features = scaler.fit_transform(numeric_data)
    scaled_numeric_df = pd.DataFrame(scaled_numeric_features, columns=available_numeric)
    print(f"Processed {len(available_numeric)} numeric features")
else:
    print("No numeric features available, creating empty dataframe")
    scaled_numeric_df = pd.DataFrame(index=sampled_music_list.index)

# Process categorical features
print("Processing categorical features...")
encoded_genre_df = pd.DataFrame(index=sampled_music_list.index)
hashed_artist_df = pd.DataFrame(index=sampled_music_list.index)
tag_embeddings_df = pd.DataFrame(index=sampled_music_list.index)

# One-hot encode genre if available
if 'genre' in sampled_music_list.columns:
    print("Processing genre features...")
    onehot_data = sampled_music_list[['genre']]
    encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
    encoded_genre_features = encoder.fit_transform(onehot_data)
    ohe_feature_names = encoder.get_feature_names_out(['genre'])
    encoded_genre_df = pd.DataFrame(encoded_genre_features, columns=ohe_feature_names)
    print(f"Processed {len(ohe_feature_names)} genre features")

# Feature hash artist if available
if 'artist' in sampled_music_list.columns:
    print("Processing artist features...")
    hash_artist_data = sampled_music_list['artist']
    hashed_artist_input = [[str(x)] if pd.notna(x) else [] for x in hash_artist_data]
    hasher_artist = FeatureHasher(n_features=500, input_type='string')
    hashed_artist_features = hasher_artist.fit_transform(hashed_artist_input)
    hashed_artist_df = pd.DataFrame(hashed_artist_features.toarray(), 
                                   columns=[f'hashed_artist_{i}' for i in range(500)])
    print("Processed 500 artist features")

# Process tags (try embeddings first, fallback to hashing)
if 'tags' in sampled_music_list.columns:
    print("Processing tag features...")
    try:
        # Try sentence transformers for tag embeddings
        sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
        tags_text = sampled_music_list['tags'].fillna("").astype(str).tolist()
        tag_embeddings = sentence_model.encode(tags_text, convert_to_numpy=True)
        tag_embeddings_df = pd.DataFrame(tag_embeddings, 
                                       columns=[f'tag_embedding_{i}' for i in range(tag_embeddings.shape[1])])
        print(f"Created {tag_embeddings.shape[1]} tag embeddings")
    except Exception as e:
        print(f"Could not create tag embeddings: {e}")
        print("Using feature hashing for tags instead...")
        # Fallback to feature hashing for tags
        hash_tags_data = sampled_music_list['tags']
        hashed_tags_input = [str(x).split(',') if pd.notna(x) else [] for x in hash_tags_data]
        hasher_tags = FeatureHasher(n_features=500, input_type='string')
        hashed_tags_features = hasher_tags.fit_transform(hashed_tags_input)
        tag_embeddings_df = pd.DataFrame(hashed_tags_features.toarray(), 
                                       columns=[f'hashed_tags_{i}' for i in range(500)])
        print("Processed 500 tag features using hashing")


Processing numeric features...
Processed 10 numeric features
Processing categorical features...
Processing genre features...
Processed 16 genre features
Processing artist features...
Processed 500 artist features
Processing tag features...
Created 384 tag embeddings


In [7]:
# Combine all processed features
print("Combining all features...")

# Collect all non-empty dataframes
feature_dfs = []
if not scaled_numeric_df.empty:
    feature_dfs.append(scaled_numeric_df)
if not encoded_genre_df.empty:
    feature_dfs.append(encoded_genre_df)
if not hashed_artist_df.empty:
    feature_dfs.append(hashed_artist_df)
if not tag_embeddings_df.empty:
    feature_dfs.append(tag_embeddings_df)

if feature_dfs:
    # Concatenate all processed features
    normalized_song_features = pd.concat(feature_dfs, axis=1)
    
    # Add track_id column
    normalized_song_features['track_id'] = original_track_ids.values
    
    print(f"Combined features shape: {normalized_song_features.shape}")
    print(f"Feature columns: {len(normalized_song_features.columns) - 1}")  # -1 for track_id
    
    # Create item content matrix for evaluation
    feature_columns = normalized_song_features.columns.drop('track_id')
    item_content_matrix = normalized_song_features[feature_columns].values.astype(np.float32)
    
    # L2 normalize for cosine similarity calculations
    norms = np.linalg.norm(item_content_matrix, axis=1, keepdims=True) + 1e-12
    item_content = item_content_matrix / norms
    
    print(f"Final item content matrix shape: {item_content.shape}")
    print(f"Features are L2-normalized: {np.allclose(np.linalg.norm(item_content, axis=1), 1.0)}")
    
    # Display feature statistics
    print(f"\nFeature statistics:")
    print(f"  Mean: {item_content.mean():.4f}")
    print(f"  Std: {item_content.std():.4f}")
    print(f"  Min: {item_content.min():.4f}")
    print(f"  Max: {item_content.max():.4f}")
    
    # Show feature breakdown
    print(f"\nFeature breakdown:")
    if not scaled_numeric_df.empty:
        print(f"  Numeric features: {len(available_numeric)}")
    if not encoded_genre_df.empty:
        print(f"  Genre features (one-hot): {len(ohe_feature_names)}")
    if not hashed_artist_df.empty:
        print(f"  Artist features (hashed): 500")
    if not tag_embeddings_df.empty:
        print(f"  Tag features: {tag_embeddings_df.shape[1]}")
    print(f"  Total features: {item_content.shape[1]}")
    
else:
    print("❌ No features could be processed!")
    raise ValueError("No features available for processing")


Combining all features...
Combined features shape: (20008, 911)
Feature columns: 910
Final item content matrix shape: (20008, 910)
Features are L2-normalized: True

Feature statistics:
  Mean: 0.0024
  Std: 0.0331
  Min: -0.4940
  Max: 0.5014

Feature breakdown:
  Numeric features: 10
  Genre features (one-hot): 16
  Artist features (hashed): 500
  Tag features: 384
  Total features: 910


## 4. Popularity Baseline Implementation


In [8]:
# Calculate track popularity from training data
print("🎵 Calculating track popularity...")

track_popularity = train_df.groupby('track_id')['playcount'].sum().reset_index()
track_popularity.rename(columns={'playcount': 'total_playcount'}, inplace=True)

# Sort by popularity (descending)
popularity_sorted = track_popularity.sort_values('total_playcount', ascending=False)

print(f"Total unique tracks: {len(track_popularity)}")
print(f"Tracks with playcount > 0: {(track_popularity['total_playcount'] > 0).sum()}")

# Display top 10 most popular tracks
print("\nTop 10 Most Popular Tracks:")
print(popularity_sorted.head(10))

# Create item mappings
item_to_idx = {item: idx for idx, item in enumerate(all_items)}
user_to_idx = {user: idx for idx, user in enumerate(all_users)}

print(f"\nItem mapping created: {len(item_to_idx)} items")
print(f"User mapping created: {len(user_to_idx)} users")


🎵 Calculating track popularity...
Total unique tracks: 20008
Tracks with playcount > 0: 20008

Top 10 Most Popular Tracks:
                 track_id  total_playcount
11376  TRONYHY128F92C9D11             2169
276    TRAFUNV128F92CFEB2             1617
2091   TRCRCBT128F4260DD1             1470
1518   TRBVNWT128F93173BA             1333
11992  TRPFYYL128F92F7144             1323
10649  TRNPKRK128F429831C             1051
7483   TRJGDTG128F421CE22             1028
13586  TRRKXNQ128F9339002             1000
11309  TROMKCG128F9320C09              974
18449  TRXUWEC128F426BE3F              967

Item mapping created: 20008 items
User mapping created: 5000 users


In [9]:
# Create popularity-based recommendations
def create_popularity_recommendations(user_to_idx, item_to_idx, popularity_sorted, k=20):
    """
    Create popularity-based recommendations for all users.
    
    Args:
        user_to_idx: User ID to index mapping
        item_to_idx: Item ID to index mapping
        popularity_sorted: DataFrame sorted by popularity
        k: Number of recommendations per user
        
    Returns:
        Dictionary mapping user indices to recommendation arrays
    """
    # Get top-k most popular items
    top_k_items = popularity_sorted.head(k)['track_id'].tolist()
    
    # Convert to indices
    top_k_indices = [item_to_idx[item] for item in top_k_items if item in item_to_idx]
    
    # Create recommendations for all users (same recommendations for everyone)
    recommendations = {}
    for user_idx in user_to_idx.values():
        recommendations[user_idx] = np.array(top_k_indices)
    
    return recommendations

# Generate recommendations for different k values
k_values = [5, 10, 15, 20]
all_recommendations = {}

for k in k_values:
    recommendations = create_popularity_recommendations(user_to_idx, item_to_idx, popularity_sorted, k)
    all_recommendations[k] = recommendations
    print(f"✅ Generated popularity recommendations for k={k}")

print(f"\nRecommendation summary:")
for k, recs in all_recommendations.items():
    print(f"  k={k}: {len(recs)} users, {len(recs[list(recs.keys())[0]])} items per user")


✅ Generated popularity recommendations for k=5
✅ Generated popularity recommendations for k=10
✅ Generated popularity recommendations for k=15
✅ Generated popularity recommendations for k=20

Recommendation summary:
  k=5: 5000 users, 5 items per user
  k=10: 5000 users, 10 items per user
  k=15: 5000 users, 15 items per user
  k=20: 5000 users, 20 items per user


## 5. Individual Metric Evaluation


In [None]:
# Evaluate individual metrics for each k value
print("🔍 Evaluating individual metrics...")

# Store results for each k value
individual_results = {}

for k in k_values:
    print(f"\n--- Evaluating k={k} ---")
    recommendations = all_recommendations[k]
    
    # Calculate each metric individually
    ndcg_score = ndcg_at_k(recommendations, test_df, user_to_idx, item_to_idx, k)
    novelty_score = novelty_at_k(recommendations, train_df, item_to_idx, k)
    diversity_score = diversity_ild_at_k(recommendations, item_content, k)
    serendipity_score = serendipity_at_k(recommendations, train_df, test_df, user_to_idx, item_to_idx, item_content, k)
    
    # Store results
    individual_results[k] = {
        'ndcg': ndcg_score,
        'novelty': novelty_score,
        'diversity': diversity_score,
        'serendipity': serendipity_score
    }
    
    # Print results
    print(f"  NDCG@{k}: {ndcg_score:.4f}")
    print(f"  Novelty@{k}: {novelty_score:.4f}")
    print(f"  Diversity@{k}: {diversity_score:.4f}")
    print(f"  Serendipity@{k}: {serendipity_score:.4f}")

print("\n✅ Individual metric evaluation completed!")


🔍 Evaluating individual metrics...

--- Evaluating k=5 ---
  NDCG@5: 0.0287
  Novelty@5: 3.1843
  Diversity@5: 0.4426
  Serendipity@5: 0.2414
  Catalog Coverage@5: 0.02%
  User Coverage: 100.00%

--- Evaluating k=10 ---
  NDCG@10: 0.0250
  Novelty@10: 3.6094
  Diversity@10: 0.4426
  Serendipity@10: 0.2478
  Catalog Coverage@10: 0.05%
  User Coverage: 100.00%

--- Evaluating k=15 ---
  NDCG@15: 0.0254
  Novelty@15: 3.7483
  Diversity@15: 0.4456
  Serendipity@15: 0.2529
  Catalog Coverage@15: 0.07%
  User Coverage: 100.00%

--- Evaluating k=20 ---
  NDCG@20: 0.0276
  Novelty@20: 3.8239
  Diversity@20: 0.4512
  Serendipity@20: 0.2593
  Catalog Coverage@20: 0.10%
  User Coverage: 100.00%

✅ Individual metric evaluation completed!


## 6. Comprehensive Evaluation


In [None]:
# Use the comprehensive evaluation function
print("📊 Running comprehensive evaluation...")

# Use k=20 recommendations for comprehensive evaluation
comprehensive_results = evaluate_recommendations(
    recommendations=all_recommendations[20],  # Use k=20 for comprehensive evaluation
    train_df=train_df,
    test_df=test_df,
    user_to_idx=user_to_idx,
    item_to_idx=item_to_idx,
    item_content=item_content,
    k_values=k_values,
    metrics=['ndcg', 'novelty', 'diversity', 'serendipity']
)

# Print formatted results
print_evaluation_summary(comprehensive_results)


📊 Running comprehensive evaluation...
Evaluating 5000 users across 4 k values...
  Calculating ndcg...
    ndcg@5: 0.0287
    ndcg@10: 0.0250
    ndcg@15: 0.0254
    ndcg@20: 0.0276
  Calculating novelty...
    novelty@5: 3.1843
    novelty@10: 3.6094
    novelty@15: 3.7483
    novelty@20: 3.8239
  Calculating diversity...
    diversity@5: 0.4426
    diversity@10: 0.4426
    diversity@15: 0.4456
    diversity@20: 0.4512
  Calculating serendipity...
    serendipity@5: 0.2414
    serendipity@10: 0.2478
    serendipity@15: 0.2529
    serendipity@20: 0.2593
  Calculating catalog_coverage...
    catalog_coverage@5: 0.0250
    catalog_coverage@10: 0.0500
    catalog_coverage@15: 0.0750
    catalog_coverage@20: 0.1000
  Calculating user_coverage...
    user_coverage@5: 100.0000
    user_coverage@10: 100.0000
    user_coverage@15: 100.0000
    user_coverage@20: 100.0000

RECOMMENDATION EVALUATION SUMMARY

NDCG:
--------------------
  @ 5: 0.0287
  @10: 0.0250
  @15: 0.0254
  @20: 0.0276

NOVEL

## 7. Results Visualization


In [None]:
# Create visualizations of the results
plt.style.use('default')
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Popularity Baseline Evaluation Results (Real Features)', fontsize=16, fontweight='bold')

# Extract data for plotting
metrics = ['ndcg', 'novelty', 'diversity', 'serendipity', 'catalog_coverage', 'user_coverage']
metric_titles = ['NDCG@k', 'Novelty@k', 'Diversity@k', 'Serendipity@k', 'Catalog Coverage@k (%)', 'User Coverage (%)']

for i, (metric, title) in enumerate(zip(metrics, metric_titles)):
    row = i // 3
    col = i % 3
    ax = axes[row, col]
    
    # Get scores for this metric
    scores = [individual_results[k][metric] for k in k_values]
    
    # Plot
    ax.plot(k_values, scores, 'o-', linewidth=2, markersize=8, color='steelblue')
    ax.set_title(title, fontweight='bold')
    ax.set_xlabel('k')
    ax.set_ylabel('Score')
    ax.grid(True, alpha=0.3)
    ax.set_xticks(k_values)
    
    # Add value labels on points
    for x, y in zip(k_values, scores):
        ax.annotate(f'{y:.3f}', (x, y), textcoords="offset points", xytext=(0,10), ha='center')

plt.tight_layout()
plt.show()

# Create a summary table
print("\n📋 Summary Table:")
summary_df = pd.DataFrame(individual_results).T
summary_df.index.name = 'k'
print(summary_df.round(4))


## 8. Analysis and Insights


In [None]:
# Analyze the results and provide insights
print("🔍 Analysis and Insights:")
print("=" * 50)

# NDCG Analysis
print("\n1. NDCG@k (Ranking Quality):")
ndcg_scores = [individual_results[k]['ndcg'] for k in k_values]
print(f"   Range: {min(ndcg_scores):.4f} - {max(ndcg_scores):.4f}")
print(f"   Trend: {'Increasing' if ndcg_scores[-1] > ndcg_scores[0] else 'Decreasing'}")
print("   Interpretation: Higher is better. Measures how well the ranking matches user preferences.")

# Novelty Analysis
print("\n2. Novelty@k (Item Unpopularity):")
novelty_scores = [individual_results[k]['novelty'] for k in k_values]
print(f"   Range: {min(novelty_scores):.4f} - {max(novelty_scores):.4f}")
print(f"   Trend: {'Increasing' if novelty_scores[-1] > novelty_scores[0] else 'Decreasing'}")
print("   Interpretation: Higher is better. Measures how 'unpopular' recommended items are.")

# Diversity Analysis
print("\n3. Diversity@k (List Variety):")
diversity_scores = [individual_results[k]['diversity'] for k in k_values]
print(f"   Range: {min(diversity_scores):.4f} - {max(diversity_scores):.4f}")
print(f"   Trend: {'Increasing' if diversity_scores[-1] > diversity_scores[0] else 'Decreasing'}")
print("   Interpretation: Higher is better. Measures variety within recommendation lists.")

# Serendipity Analysis
print("\n4. Serendipity@k (Surprising Relevance):")
serendipity_scores = [individual_results[k]['serendipity'] for k in k_values]
print(f"   Range: {min(serendipity_scores):.4f} - {max(serendipity_scores):.4f}")
print(f"   Trend: {'Increasing' if serendipity_scores[-1] > serendipity_scores[0] else 'Decreasing'}")
print("   Interpretation: Higher is better. Measures surprising but relevant recommendations.")

# Overall Assessment
print("\n5. Overall Assessment:")
print("   Popularity baseline characteristics:")
print("   ✅ High user coverage (100%) - all users get recommendations")
print("   ✅ Consistent recommendations across users")
print("   ❌ Low novelty - only recommends popular items")
print("   ❌ Low diversity - same items for all users")
print("   ❌ Low serendipity - no personalization")
print("   ❌ Limited catalog coverage - only top-k popular items")

print("\n7. Recommendations for Improvement:")
print("   - Implement collaborative filtering for personalization")
print("   - Add content-based filtering for diversity")
print("   - Use hybrid approaches to balance popularity and personalization")
print("   - Consider user history for serendipity")

print("\n8. Real Features Impact:")
print("   - Using real audio features provides more meaningful diversity calculations")
print("   - Tag embeddings capture semantic similarity better than synthetic features")
print("   - Artist features help distinguish between different musical styles")
print("   - Genre features provide categorical diversity information")
