# Enhanced Movie Data Preprocessing for LSA Recommendation System
## Advanced preprocessing with weighted features and comprehensive text engineering

In [1]:
import pandas as pd
import numpy as np
import ast
import os
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import StandardScaler
import nltk
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Download required NLTK data
try:
    nltk.download('stopwords', quiet=True)
    nltk.download('wordnet', quiet=True)
    nltk.download('punkt', quiet=True)
except:
    print("NLTK data already downloaded or download failed")

print("Enhanced Movie Data Preprocessing")
print("================================")

Enhanced Movie Data Preprocessing


## 1. Data Loading and Initial Exploration

In [2]:
# Load the raw dataset
try:
    df = pd.read_csv("../data/processed/movies_with_features.csv")
    print(f"Loaded processed dataset: {df.shape}")
except FileNotFoundError:
    # If processed file doesn't exist, load and merge raw files
    print(" Loading raw datasets...")
    movies = pd.read_csv("../data/raw/tmdb_5000_movies.csv")
    credits = pd.read_csv("../data/raw/tmdb_5000_credits.csv")
    
    # Merge datasets
    df = movies.merge(credits, left_on='id', right_on='movie_id', how='inner')
    print(f"Merged raw datasets: {df.shape}")

print(f"\nDataset Overview:")
print(f"   Movies: {len(df)}")
print(f"   Features: {len(df.columns)}")
print(f"   Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")

# Display basic info
df.head()

Loaded processed dataset: (2723, 32)

Dataset Overview:
   Movies: 2723
   Features: 32
   Memory usage: 30.2 MB


Unnamed: 0,original_title,budget,genres,homepage,id,keywords,original_language,overview,popularity,production_companies,...,title,genres_list,num_genres,cast_list,num_cast,director,overview_length,overview_word_count,release_year,release_month
0,Avatar,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,Avatar,"['Action', 'Adventure', 'Fantasy', 'Science Fi...",4,"['Sam Worthington', 'Zoe Saldana', 'Sigourney ...",5,James Cameron,175,28,2009.0,12.0
1,Spectre,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,Spectre,"['Action', 'Adventure', 'Crime']",3,"['Daniel Craig', 'Christoph Waltz', 'Léa Seydo...",5,Sam Mendes,240,41,2015.0,10.0
2,The Dark Knight Rises,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...",...,The Dark Knight Rises,"['Action', 'Crime', 'Drama', 'Thriller']",4,"['Christian Bale', 'Michael Caine', 'Gary Oldm...",5,Christopher Nolan,428,65,2012.0,7.0
3,John Carter,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]",...,John Carter,"['Action', 'Adventure', 'Science Fiction']",3,"['Taylor Kitsch', 'Lynn Collins', 'Samantha Mo...",5,Andrew Stanton,342,55,2012.0,3.0
4,Spider-Man 3,258000000,"[{""id"": 14, ""name"": ""Fantasy""}, {""id"": 28, ""na...",http://www.sonypictures.com/movies/spider-man3/,559,"[{""id"": 851, ""name"": ""dual identity""}, {""id"": ...",en,The seemingly invincible Spider-Man goes up ag...,115.699814,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,Spider-Man 3,"['Fantasy', 'Action', 'Adventure']",3,"['Tobey Maguire', 'Kirsten Dunst', 'James Fran...",5,Sam Raimi,287,45,2007.0,5.0


## 2. Enhanced Data Cleaning and Missing Value Handling

In [13]:
# Advanced missing value analysis
def analyze_missing_values(df):
    """
    Comprehensive missing value analysis
    """
    missing_info = []
    for col in df.columns:
        missing_count = df[col].isnull().sum()
        missing_pct = (missing_count / len(df)) * 100
        if missing_count > 0:
            missing_info.append({
                'column': col,
                'missing_count': missing_count,
                'missing_percentage': missing_pct,
                'dtype': str(df[col].dtype)
            })

    return pd.DataFrame(missing_info).sort_values(
        'missing_percentage', ascending=False
    )

print("Missing Value Analysis:")
missing_analysis = analyze_missing_values(df)
if not missing_analysis.empty:
    print(missing_analysis.head(10))
else:
    print("No missing values found.")

# Enhanced missing value handling
print("\nEnhanced Missing Value Handling:")

# Text columns → fill with empty string
text_cols = ['overview', 'tagline', 'homepage']
for col in text_cols:
    if col in df.columns:
        before_count = df[col].isnull().sum()
        df[col] = df[col].fillna('')
        print(f"{col}: filled {before_count} missing values with empty string")

# Numeric columns → intelligent filling
numeric_cols = [
    'budget', 'popularity', 'revenue',
    'runtime', 'vote_average', 'vote_count'
]
for col in numeric_cols:
    if col in df.columns:
        before_count = df[col].isnull().sum()
        if before_count > 0:
            if col in ['budget', 'revenue', 'popularity']:
                fill_value = df[col].median()
            else:
                fill_value = df[col].mean()

            df[col] = df[col].fillna(fill_value)
            print(
                f"{col}: filled {before_count} missing values "
                f"with {fill_value:.2f}"
            )

# List columns → convert from string JSON to list
list_cols = [
    'genres', 'keywords', 'cast', 'crew',
    'production_companies', 'production_countries',
    'spoken_languages'
]
for col in list_cols:
    if col in df.columns:
        before_count = df[col].isnull().sum()
        df[col] = df[col].apply(
            lambda x: [] if pd.isna(x) or x == '[]'
            else ast.literal_eval(x) if isinstance(x, str)
            else x
        )
        print(f"{col}: processed {len(df) - before_count} JSON lists")


Missing Value Analysis:
          column  missing_count  missing_percentage    dtype
0       homepage           3091           64.369013   object
4        tagline            844           17.576010   object
5       director             30            0.624740   object
1       overview              3            0.062474   object
3        runtime              2            0.041649  float64
2   release_date              1            0.020825   object
6   release_year              1            0.020825  float64
7  release_month              1            0.020825  float64

Enhanced Missing Value Handling:
overview: filled 3 missing values with empty string
tagline: filled 844 missing values with empty string
homepage: filled 3091 missing values with empty string
runtime: filled 2 missing values with 106.86
genres: processed 4802 JSON lists
keywords: processed 4802 JSON lists
cast: processed 4802 JSON lists
crew: processed 4802 JSON lists
production_companies: processed 4802 JSON lists
produc

## 3. Advanced Feature Extraction

In [21]:
# Enhanced feature extraction with better error handling
def extract_names_advanced(obj_list, key='name', top_n=None, fallback_key=None):
    """
    Advanced name extraction with fallback options
    """
    if not obj_list or not isinstance(obj_list, list):
        return []
    
    names = []
    for obj in obj_list:
        if isinstance(obj, dict):
            if key in obj and obj[key]:
                names.append(str(obj[key]).strip())
            elif fallback_key and fallback_key in obj and obj[fallback_key]:
                names.append(str(obj[fallback_key]).strip())
    
    # Remove duplicates while preserving order
    seen = set()
    unique_names = []
    for name in names:
        if name.lower() not in seen:
            seen.add(name.lower())
            unique_names.append(name)
    
    if top_n:
        return unique_names[:top_n]
    return unique_names

print("Advanced Feature Extraction:")

# Extract enhanced features
df['genres_list'] = df['genres'].apply(lambda x: extract_names_advanced(x))
df['keywords_list'] = df['keywords'].apply(lambda x: extract_names_advanced(x, top_n=20))  # Limit keywords
df['cast_list'] = df['cast'].apply(lambda x: extract_names_advanced(x, top_n=15))  # Top 15 cast
df['director_list'] = df['crew'].apply(lambda x: [d['name'] for d in x if isinstance(d, dict) and d.get('job')=='Director'] if x else [])
df['companies_list'] = df['production_companies'].apply(lambda x: extract_names_advanced(x, top_n=5))
df['countries_list'] = df['production_countries'].apply(lambda x: extract_names_advanced(x))
df['languages_list'] = df['spoken_languages'].apply(lambda x: extract_names_advanced(x, key='name'))

# Extract additional crew roles for better recommendations
def extract_crew_roles(crew_list, roles=['Producer', 'Writer', 'Screenplay', 'Cinematography']):
    """
    Extract specific crew roles
    """
    if not crew_list:
        return []
    
    crew_names = []
    for person in crew_list:
        if isinstance(person, dict) and person.get('job') in roles:
            crew_names.append(person.get('name', ''))
    
    return crew_names[:5]  # Limit to top 5

df['key_crew_list'] = df['crew'].apply(extract_crew_roles)

print(f"   Genres extracted: {df['genres_list'].apply(len).sum()} total")
print(f"   Keywords extracted: {df['keywords_list'].apply(len).sum()} total")
print(f"   Cast members extracted: {df['cast_list'].apply(len).sum()} total")
print(f"   Directors extracted: {df['director_list'].apply(len).sum()} total")
print(f"   Key crew extracted: {df['key_crew_list'].apply(len).sum()} total")

Advanced Feature Extraction:
   Genres extracted: 7520 total
   Keywords extracted: 22389 total
   Cast members extracted: 37932 total
   Directors extracted: 2920 total
   Key crew extracted: 10155 total


## 4. Enhanced Numerical Feature Engineering

In [15]:
# Create comprehensive numerical features
print("Enhanced Numerical Feature Engineering:")

# Count features
df['num_genres'] = df['genres_list'].apply(len)
df['num_keywords'] = df['keywords_list'].apply(len)
df['num_cast'] = df['cast_list'].apply(len)
df['num_directors'] = df['director_list'].apply(len)
df['num_companies'] = df['companies_list'].apply(len)
df['num_countries'] = df['countries_list'].apply(len)
df['num_languages'] = df['languages_list'].apply(len)
df['num_key_crew'] = df['key_crew_list'].apply(len)

# Text-based features
df['overview_length'] = df['overview'].apply(lambda x: len(str(x)))
df['overview_word_count'] = df['overview'].apply(lambda x: len(str(x).split()))
df['title_length'] = df['original_title'].apply(lambda x: len(str(x)))

# Temporal features
if 'release_date' in df.columns:
    df['release_date'] = pd.to_datetime(df['release_date'], errors='coerce')
    df['release_year'] = df['release_date'].dt.year
    df['release_month'] = df['release_date'].dt.month
    df['release_day_of_year'] = df['release_date'].dt.dayofyear
    
    # Create decade feature
    df['release_decade'] = (df['release_year'] // 10) * 10
    
    print(f"   Temporal features created from release_date")

# Financial ratios and derived features
if 'budget' in df.columns and 'revenue' in df.columns:
    df['profit'] = df['revenue'] - df['budget']
    df['roi'] = np.where(df['budget'] > 0, df['profit'] / df['budget'], 0)
    df['budget_per_minute'] = np.where(df['runtime'] > 0, df['budget'] / df['runtime'], 0)
    
    print(f"   Financial features created")

# Rating and popularity features
if 'vote_average' in df.columns and 'vote_count' in df.columns:
    # Weighted rating (IMDB formula)
    C = df['vote_average'].mean()  # Mean vote across all movies
    m = df['vote_count'].quantile(0.9)  # Minimum votes required
    df['weighted_rating'] = ((df['vote_count'] / (df['vote_count'] + m)) * df['vote_average'] + 
                            (m / (df['vote_count'] + m)) * C)
    
    # Popularity score combining rating and vote count
    df['popularity_score'] = df['vote_average'] * np.log1p(df['vote_count'])
    
    print(f"   Rating and popularity features created")

print(f"\nFeature Summary:")
print(f"   Total features: {len(df.columns)}")
print(f"   Numerical features: {len(df.select_dtypes(include=[np.number]).columns)}")
print(f"   Text features: {len(df.select_dtypes(include=['object']).columns)}")

Enhanced Numerical Feature Engineering:
   Temporal features created from release_date
   Financial features created
   Rating and popularity features created

Feature Summary:
   Total features: 52
   Numerical features: 28
   Text features: 23


## 5. Advanced Text Preprocessing

In [16]:
# Advanced text preprocessing for LSA
print("Advanced Text Preprocessing:")

# Enhanced stopwords
try:
    stop_words = set(stopwords.words('english'))
    # Add movie-specific stopwords
    movie_stopwords = {'movie', 'film', 'story', 'man', 'woman', 'one', 'two', 'get', 'go', 'come', 'see', 'know', 'time', 'way', 'make', 'take', 'find'}
    stop_words.update(movie_stopwords)
except:
    stop_words = set()
    print("   NLTK stopwords not available, using basic set")

# Initialize lemmatizer
try:
    lemmatizer = WordNetLemmatizer()
except:
    lemmatizer = None
    print("   NLTK lemmatizer not available")

def advanced_text_cleaning(text):
    """
    Advanced text cleaning for movie recommendation
    """
    if not text or pd.isna(text):
        return ''
    
    text = str(text).lower()
    
    # Remove special characters but keep spaces
    text = re.sub(r'[^a-z0-9\s]', ' ', text)
    
    # Remove extra whitespace
    text = re.sub(r'\s+', ' ', text).strip()
    
    # Split into words
    words = text.split()
    
    # Filter and lemmatize
    processed_words = []
    for word in words:
        # Skip short words and stopwords
        if len(word) < 2 or word in stop_words:
            continue
        
        # Lemmatize if available
        if lemmatizer:
            word = lemmatizer.lemmatize(word)
        
        processed_words.append(word)
    
    return ' '.join(processed_words)

# Create multiple text feature combinations for LSA
print("   Creating text feature combinations...")

# Standard text features (for comparison)
df['text_features'] = (
    df['overview'] + ' ' +
    df['tagline'] + ' ' +
    df['genres_list'].apply(lambda x: ' '.join(x)) + ' ' +
    df['keywords_list'].apply(lambda x: ' '.join(x)) + ' ' +
    df['cast_list'].apply(lambda x: ' '.join(x)) + ' ' +
    df['director_list'].apply(lambda x: ' '.join(x))
)

# Enhanced weighted text features for better LSA performance
df['enhanced_text_features'] = (
    df['overview'] + ' ' +
    df['tagline'] + ' ' +
    # Weight genres heavily (4x) - most important for similarity
    df['genres_list'].apply(lambda x: ' '.join(x * 4)) + ' ' +
    # Weight keywords moderately (3x) - important for content similarity
    df['keywords_list'].apply(lambda x: ' '.join(x * 3)) + ' ' +
    # Weight directors heavily (3x) - important for style similarity
    df['director_list'].apply(lambda x: ' '.join(x * 3)) + ' ' +
    # Weight main cast moderately (2x)
    df['cast_list'].apply(lambda x: ' '.join(x[:5] * 2)) + ' ' +
    # Add key crew for additional context
    df['key_crew_list'].apply(lambda x: ' '.join(x)) + ' ' +
    # Add production companies for studio similarity
    df['companies_list'].apply(lambda x: ' '.join(x))
)

# Apply advanced cleaning
print("   Applying advanced text cleaning...")
df['text_features'] = df['text_features'].apply(advanced_text_cleaning)
df['enhanced_text_features'] = df['enhanced_text_features'].apply(advanced_text_cleaning)

# Create genre-specific text features
df['genre_text'] = df['genres_list'].apply(lambda x: ' '.join(x * 5))  # Heavy genre weighting
df['genre_text'] = df['genre_text'].apply(advanced_text_cleaning)

print(f"   Text preprocessing complete")
print(f"   Average text feature length: {df['enhanced_text_features'].apply(len).mean():.0f} characters")
print(f"   Average word count: {df['enhanced_text_features'].apply(lambda x: len(x.split())).mean():.0f} words")

Advanced Text Preprocessing:
   Creating text feature combinations...
   Applying advanced text cleaning...
   Text preprocessing complete
   Average text feature length: 804 characters
   Average word count: 112 words


## 6. Numerical Feature Scaling and Transformation

In [17]:
# Advanced numerical preprocessing
print("Advanced Numerical Feature Processing:")

# Identify skewed numerical features for log transformation
skewed_features = ['budget', 'revenue', 'popularity', 'runtime', 'vote_count']
skewed_features = [col for col in skewed_features if col in df.columns]

print(f"   Applying log transformation to skewed features: {skewed_features}")
for col in skewed_features:
    # Apply log1p transformation (handles zeros)
    df[f'{col}_log'] = np.log1p(df[col])
    
    # Also create original scaled version
    df[col] = df[col].apply(lambda x: np.log1p(x))

# Standardize numerical features for hybrid model
numerical_features = ['budget', 'revenue', 'popularity', 'runtime', 'vote_average', 'vote_count']
numerical_features = [col for col in numerical_features if col in df.columns]

if numerical_features:
    print(f"   Standardizing numerical features: {numerical_features}")
    scaler = StandardScaler()
    df[numerical_features] = scaler.fit_transform(df[numerical_features])
    
    # Save scaler for later use
    import joblib
    os.makedirs('../models', exist_ok=True)
    joblib.dump(scaler, '../models/numerical_scaler.pkl')
    print(f"   Scaler saved to ../models/numerical_scaler.pkl")

# Create additional derived features
print("   Creating derived features...")

# Popularity tier (for filtering)
if 'popularity' in df.columns:
    df['popularity_tier'] = pd.qcut(df['popularity'], q=5, labels=['Low', 'Below_Avg', 'Average', 'Above_Avg', 'High'])

# Rating tier
if 'vote_average' in df.columns:
    df['rating_tier'] = pd.cut(df['vote_average'], 
                              bins=[-np.inf, 4, 5.5, 7, 8.5, np.inf], 
                              labels=['Poor', 'Below_Avg', 'Good', 'Very_Good', 'Excellent'])

print(f"   Numerical preprocessing complete")

Advanced Numerical Feature Processing:
   Applying log transformation to skewed features: ['budget', 'revenue', 'popularity', 'runtime', 'vote_count']
   Standardizing numerical features: ['budget', 'revenue', 'popularity', 'runtime', 'vote_average', 'vote_count']
   Scaler saved to ../models/numerical_scaler.pkl
   Creating derived features...
   Numerical preprocessing complete


## 7. Data Quality Assessment

In [18]:
# Comprehensive data quality assessment
print("Data Quality Assessment:")

# Check for duplicates
duplicates = df.duplicated(subset=['original_title', 'release_year']).sum()
print(f"   Duplicate movies: {duplicates}")

if duplicates > 0:
    print(f"   Removing {duplicates} duplicates...")
    df = df.drop_duplicates(subset=['original_title', 'release_year'], keep='first')

# Check text feature quality
empty_text = (df['enhanced_text_features'].str.len() < 10).sum()
print(f"   Movies with insufficient text features: {empty_text}")

# Check for outliers in numerical features
outlier_summary = []
for col in numerical_features:
    if col in df.columns:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        outliers = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
        outlier_summary.append(f"{col}: {outliers}")

print(f"   Outliers detected: {', '.join(outlier_summary)}")

# Final dataset statistics
print(f"\nFinal Dataset Statistics:")
print(f"   Movies: {len(df)}")
print(f"   Features: {len(df.columns)}")
print(f"   Year range: {df['release_year'].min():.0f} - {df['release_year'].max():.0f}")
print(f"   Average rating: {df['vote_average'].mean():.2f}")
print(f"   Unique genres: {len(set([g for sublist in df['genres_list'] for g in sublist]))}")
print(f"   Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")

Data Quality Assessment:
   Duplicate movies: 0
   Movies with insufficient text features: 238
   Outliers detected: budget: 283, revenue: 433, popularity: 33, runtime: 44, vote_average: 40, vote_count: 24

Final Dataset Statistics:
   Movies: 2723
   Features: 62
   Year range: 1927 - 2016
   Average rating: -0.00
   Unique genres: 19
   Memory usage: 12.6 MB


## 8. Save Enhanced Processed Dataset

In [None]:
# Save the enhanced processed dataset
print("Saving Enhanced Processed Dataset:")

# Ensure processed folder exists
os.makedirs('../data/processed', exist_ok=True)

# Save main processed dataset
output_path = '../data/processed/movies_enhanced_processed.csv'
df.to_csv(output_path, index=False)
print(f"   Enhanced dataset saved: {output_path}")

# Also save a backup of the original processed version
backup_path = '../data/processed/movies_final_processed.csv'
df.to_csv(backup_path, index=False)
print(f"   Backup saved: {backup_path}")

# Create a feature summary file
feature_summary = {
    'total_movies': len(df),
    'total_features': len(df.columns),
    'text_features': ['text_features', 'enhanced_text_features', 'genre_text'],
    'numerical_features': numerical_features,
    'list_features': ['genres_list', 'keywords_list', 'cast_list', 'director_list', 'key_crew_list'],
    'derived_features': ['popularity_tier', 'rating_tier', 'weighted_rating', 'popularity_score'],
    'preprocessing_applied': {
        'text_cleaning': True,
        'log_transformation': True,
        'standardization': True,
        'feature_weighting': True,
        'duplicate_removal': True
    }
}

import json
with open('../data/processed/feature_summary.json', 'w') as f:
    json.dump(feature_summary, f, indent=2)

print(f"   Feature summary saved: ../data/processed/feature_summary.json")

print("\nENHANCED PREPROCESSING COMPLETE!")
print("Dataset ready for advanced LSA movie recommendation system")
print("\n Key Enhancements:")
print("   Weighted text features for better LSA performance")
print("   Advanced text cleaning and preprocessing")
print("   Comprehensive numerical feature engineering")
print("   Multiple text feature combinations")
print("   Quality assessment and outlier detection")
print("   Derived features for enhanced recommendations")

# Display sample of enhanced features
print("\nSample Enhanced Features:")
sample_movie = df.iloc[0]
print(f"   Movie: {sample_movie['original_title']}")
print(f"   Enhanced text (first 100 chars): {sample_movie['enhanced_text_features'][:100]}...")
print(f"   Genres: {sample_movie['genres_list']}")
print(f"   Rating tier: {sample_movie.get('rating_tier', 'N/A')}")
print(f"   Popularity tier: {sample_movie.get('popularity_tier', 'N/A')}")

Saving Enhanced Processed Dataset:
   Enhanced dataset saved: ../data/processed/movies_enhanced_processed.csv
   Backup saved: ../data/processed/movies_final_processed.csv
   Feature summary saved: ../data/processed/feature_summary.json

ENHANCED PREPROCESSING COMPLETE!
Dataset ready for advanced LSA movie recommendation system

 Key Enhancements:
   ✓ Weighted text features for better LSA performance
   ✓ Advanced text cleaning and preprocessing
   ✓ Comprehensive numerical feature engineering
   ✓ Multiple text feature combinations
   ✓ Quality assessment and outlier detection
   ✓ Derived features for enhanced recommendations

Sample Enhanced Features:
   Movie: Avatar
   Enhanced text (first 100 chars): 22nd century paraplegic marine dispatched moon pandora unique mission becomes torn following order p...
   Genres: ['Action', 'Adventure', 'Fantasy', 'Science Fiction']
   Rating tier: Poor
   Popularity tier: High
