# Part 6: Recommender System Demo and Evaluation

This notebook demonstrates the complete sentiment-based recommender system in action. Users can input reviews, get sentiment predictions, and receive business recommendations based on sentiment patterns and business similarity.

## Importing Dependencies

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import json
import warnings
warnings.filterwarnings('ignore')

# Set style for plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## Load Pre-trained Components

In [None]:
# Execute the previous notebook to load all components
%run 05_sentiment_based_recommender_system.ipynb

## Enhanced Recommendation Function

In [None]:
def recommend_businesses_from_review(user_review, user_location=None, category_preference=None, top_k=5):
    """
    Complete recommendation pipeline:
    1. Analyze sentiment of user review
    2. Find businesses with similar sentiment patterns
    3. Apply location and category filters if provided
    4. Return top recommendations
    """
    print("=" * 60)
    print("SENTIMENT-BASED BUSINESS RECOMMENDER SYSTEM")
    print("=" * 60)
    
    # Step 1: Analyze sentiment
    print(f"\n📝 User Review: '{user_review[:100]}{'...' if len(user_review) > 100 else ''}'")
    
    predicted_sentiment, confidence = recommender.predict_user_sentiment(user_review)
    print(f"\n🎯 Predicted Sentiment: {predicted_sentiment.upper()} (Confidence: {confidence:.3f})")
    
    # Step 2: Filter businesses based on sentiment preference
    if predicted_sentiment == 'positive':
        # User likes positive experiences, recommend highly-rated businesses
        filtered_businesses = recommender.business_features_df[
            (recommender.business_features_df['positive_ratio'] > 0.6) &
            (recommender.business_features_df['total_reviews'] >= 10)
        ]
        print("\n✨ Recommendation Strategy: Finding businesses with consistently positive reviews")
    elif predicted_sentiment == 'negative':
        # User had negative experience, recommend businesses with high positive ratios
        filtered_businesses = recommender.business_features_df[
            (recommender.business_features_df['positive_ratio'] > 0.7) &
            (recommender.business_features_df['stars'] >= 4.0) &
            (recommender.business_features_df['total_reviews'] >= 20)
        ]
        print("\n🔄 Recommendation Strategy: Finding highly-rated alternatives to improve your experience")
    else:
        # Neutral sentiment, recommend diverse options
        filtered_businesses = recommender.business_features_df[
            (recommender.business_features_df['total_reviews'] >= 5)
        ]
        print("\n🎲 Recommendation Strategy: Finding diverse business options")
    
    # Step 3: Apply location filter
    if user_location:
        filtered_businesses = filtered_businesses[
            filtered_businesses['city'].str.contains(user_location, case=False, na=False) |
            filtered_businesses['state'].str.contains(user_location, case=False, na=False)
        ]
        print(f"\n📍 Location Filter: {user_location}")
    
    # Step 4: Apply category filter
    if category_preference:
        filtered_businesses = filtered_businesses[
            filtered_businesses['categories'].str.contains(category_preference, case=False, na=False)
        ]
        print(f"\n🏷️ Category Filter: {category_preference}")
    
    if len(filtered_businesses) == 0:
        print("\n❌ No businesses found matching your criteria. Try broader filters.")
        return []
    
    # Step 5: Rank by composite score
    filtered_businesses = filtered_businesses.copy()
    filtered_businesses['composite_score'] = (
        0.4 * filtered_businesses['positive_ratio'] +
        0.3 * (filtered_businesses['stars'] / 5.0) +
        0.2 * np.log1p(filtered_businesses['total_reviews']) / 10 +
        0.1 * (1 - filtered_businesses['negative_ratio'])
    )
    
    # Get top recommendations
    top_recommendations = filtered_businesses.nlargest(top_k, 'composite_score')
    
    print(f"\n🎯 Top {len(top_recommendations)} Recommendations:")
    print("=" * 60)
    
    recommendations = []
    for idx, (_, business) in enumerate(top_recommendations.iterrows(), 1):
        rec = {
            'rank': idx,
            'business_id': business['business_id'],
            'name': business['name'],
            'city': business['city'],
            'state': business['state'],
            'categories': business['categories'],
            'stars': business['stars'],
            'positive_ratio': business['positive_ratio'],
            'total_reviews': int(business['total_reviews']),
            'composite_score': business['composite_score']
        }
        
        print(f"\n{idx}. {business['name']}")
        print(f"   📍 {business['city']}, {business['state']}")
        print(f"   ⭐ {business['stars']:.1f}/5.0 stars")
        print(f"   😊 {business['positive_ratio']:.1%} positive reviews ({int(business['total_reviews'])} total)")
        print(f"   🏷️ {business['categories'][:80]}{'...' if len(str(business['categories'])) > 80 else ''}")
        print(f"   📊 Score: {business['composite_score']:.3f}")
        
        recommendations.append(rec)
    
    return recommendations

## Demo: Restaurant Recommendation

In [None]:
# Example 1: Positive review about a restaurant
positive_review = """
Amazing dining experience! The food was absolutely delicious, 
service was outstanding, and the atmosphere was perfect for a date night. 
The pasta was cooked to perfection and the dessert was heavenly. 
Will definitely come back!
"""

recommendations_1 = recommend_businesses_from_review(
    user_review=positive_review,
    user_location="Las Vegas",
    category_preference="Restaurants",
    top_k=5
)

## Demo: Hotel Recommendation

In [None]:
# Example 2: Negative review about a hotel
negative_review = """
Terrible experience at this hotel. The room was dirty, 
staff was rude, and the facilities were outdated. 
The bed was uncomfortable and there was noise all night. 
Would not recommend to anyone.
"""

recommendations_2 = recommend_businesses_from_review(
    user_review=negative_review,
    user_location="Phoenix",
    category_preference="Hotels",
    top_k=3
)

## Demo: General Business Recommendation

In [None]:
# Example 3: Neutral review about shopping
neutral_review = """
The store was okay, nothing special but not bad either. 
They had what I needed and the prices were reasonable. 
Staff was helpful when I asked questions.
"""

recommendations_3 = recommend_businesses_from_review(
    user_review=neutral_review,
    user_location="Toronto",
    category_preference="Shopping",
    top_k=4
)

## Visualization: Sentiment Distribution Analysis

In [None]:
# Analyze sentiment distribution across different business categories
def plot_sentiment_analysis():
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # 1. Overall sentiment distribution
    sentiment_counts = merged_df['sentiment'].value_counts()
    axes[0, 0].pie(sentiment_counts.values, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90)
    axes[0, 0].set_title('Overall Sentiment Distribution', fontsize=14, fontweight='bold')
    
    # 2. Business rating vs sentiment
    sentiment_by_rating = merged_df.groupby(['stars_business', 'sentiment']).size().unstack(fill_value=0)
    sentiment_by_rating_pct = sentiment_by_rating.div(sentiment_by_rating.sum(axis=1), axis=0) * 100
    sentiment_by_rating_pct.plot(kind='bar', stacked=True, ax=axes[0, 1], 
                                 color=['#ff6b6b', '#ffd93d', '#6bcf7f'])
    axes[0, 1].set_title('Sentiment Distribution by Business Rating', fontsize=14, fontweight='bold')
    axes[0, 1].set_xlabel('Business Stars')
    axes[0, 1].set_ylabel('Percentage')
    axes[0, 1].legend(title='Sentiment')
    axes[0, 1].tick_params(axis='x', rotation=0)
    
    # 3. Top categories by positive sentiment ratio
    category_sentiment = []
    for _, business in business_df.head(100).iterrows():  # Sample for performance
        if pd.notna(business['categories']):
            categories = [cat.strip() for cat in business['categories'].split(',')]
            for category in categories[:3]:  # Top 3 categories per business
                business_reviews = merged_df[merged_df['business_id'] == business['business_id']]
                if len(business_reviews) > 0:
                    positive_ratio = (business_reviews['sentiment'] == 'positive').mean()
                    category_sentiment.append({'category': category, 'positive_ratio': positive_ratio})
    
    if category_sentiment:
        category_df = pd.DataFrame(category_sentiment)
        top_categories = category_df.groupby('category')['positive_ratio'].mean().nlargest(10)
        top_categories.plot(kind='barh', ax=axes[1, 0], color='skyblue')
        axes[1, 0].set_title('Top 10 Categories by Positive Sentiment Ratio', fontsize=14, fontweight='bold')
        axes[1, 0].set_xlabel('Average Positive Sentiment Ratio')
    
    # 4. Business sentiment profile distribution
    axes[1, 1].hist(sentiment_profile_df['positive_ratio'], bins=30, alpha=0.7, color='green', label='Positive')
    axes[1, 1].hist(sentiment_profile_df['negative_ratio'], bins=30, alpha=0.7, color='red', label='Negative')
    axes[1, 1].set_title('Distribution of Business Sentiment Ratios', fontsize=14, fontweight='bold')
    axes[1, 1].set_xlabel('Sentiment Ratio')
    axes[1, 1].set_ylabel('Number of Businesses')
    axes[1, 1].legend()
    
    plt.tight_layout()
    plt.show()

plot_sentiment_analysis()

## System Performance Evaluation

In [None]:
# Evaluate recommendation system performance
def evaluate_recommendation_system():
    print("=" * 60)
    print("RECOMMENDATION SYSTEM EVALUATION")
    print("=" * 60)
    
    # 1. Dataset Statistics
    print(f"\n📊 Dataset Statistics:")
    print(f"   • Total Businesses: {len(business_df):,}")
    print(f"   • Total Reviews: {len(merged_df):,}")
    print(f"   • Businesses with Sentiment Profiles: {len(sentiment_profile_df):,}")
    print(f"   • Average Reviews per Business: {merged_df.groupby('business_id').size().mean():.1f}")
    
    # 2. Sentiment Model Performance (from previous notebook)
    print(f"\n🎯 Sentiment Model Performance:")
    print(f"   • Model: RoBERTa-LSTM")
    print(f"   • Test Accuracy: 86.0%")
    print(f"   • F1-Score: 86.0%")
    print(f"   • Classes: Negative (0), Neutral (1), Positive (2)")
    
    # 3. Business Coverage Analysis
    businesses_with_reviews = sentiment_profile_df[sentiment_profile_df['total_reviews'] > 0]
    high_confidence_businesses = businesses_with_reviews[businesses_with_reviews['total_reviews'] >= 10]
    
    print(f"\n🏢 Business Coverage:")
    print(f"   • Businesses with Reviews: {len(businesses_with_reviews):,} ({len(businesses_with_reviews)/len(business_df)*100:.1f}%)")
    print(f"   • High Confidence Businesses (≥10 reviews): {len(high_confidence_businesses):,}")
    print(f"   • Average Positive Sentiment Ratio: {businesses_with_reviews['positive_ratio'].mean():.3f}")
    
    # 4. Category Distribution
    category_counts = {}
    for categories_str in business_df['categories'].dropna():
        for category in categories_str.split(','):
            category = category.strip()
            category_counts[category] = category_counts.get(category, 0) + 1
    
    top_categories = sorted(category_counts.items(), key=lambda x: x[1], reverse=True)[:10]
    print(f"\n🏷️ Top Business Categories:")
    for i, (category, count) in enumerate(top_categories, 1):
        print(f"   {i:2d}. {category}: {count:,} businesses")
    
    # 5. Recommendation Quality Metrics
    print(f"\n⚡ System Capabilities:")
    print(f"   • Real-time sentiment analysis of user reviews")
    print(f"   • Content-based filtering using business features")
    print(f"   • Location and category filtering")
    print(f"   • Composite scoring with multiple factors")
    print(f"   • Scalable to {len(business_df):,}+ businesses")

evaluate_recommendation_system()

## Interactive Recommendation Interface

In [None]:
def interactive_recommendation():
    """
    Interactive function for users to get recommendations
    """
    print("\n" + "=" * 60)
    print("🎯 INTERACTIVE BUSINESS RECOMMENDER")
    print("=" * 60)
    print("\nShare your experience and get personalized recommendations!")
    
    # Sample inputs for demonstration
    sample_reviews = [
        "The coffee was amazing and the barista was so friendly! Great atmosphere for working.",
        "Food was cold and service was terrible. Very disappointed with this place.",
        "Decent place, nothing extraordinary but good value for money.",
        "Outstanding service! The staff went above and beyond to make our experience special.",
        "The gym equipment is outdated and the facility needs better maintenance."
    ]
    
    sample_locations = ["Las Vegas", "Phoenix", "Toronto", "Charlotte", "Pittsburgh"]
    sample_categories = ["Coffee & Tea", "Restaurants", "Shopping", "Hotels & Travel", "Active Life"]
    
    print("\n📝 Sample Reviews to Try:")
    for i, review in enumerate(sample_reviews, 1):
        print(f"   {i}. {review}")
    
    print("\n📍 Sample Locations: ", ", ".join(sample_locations))
    print("🏷️ Sample Categories: ", ", ".join(sample_categories))
    
    # For demonstration, use the first sample
    print("\n" + "-" * 60)
    print("DEMO: Using Sample Review #1")
    print("-" * 60)
    
    recommendations = recommend_businesses_from_review(
        user_review=sample_reviews[0],
        user_location=sample_locations[0],
        category_preference=sample_categories[0],
        top_k=3
    )
    
    return recommendations

# Run interactive demo
demo_recommendations = interactive_recommendation()

## Summary and Conclusions

In [None]:
print("=" * 70)
print("🎉 SENTIMENT-BASED RECOMMENDER SYSTEM - SUMMARY")
print("=" * 70)

print("\n🔬 TECHNICAL ACHIEVEMENTS:")
print("   ✅ Integrated BERT-LSTM sentiment analysis (86% F1-score)")
print("   ✅ Built content-based recommender system")
print("   ✅ Combined sentiment patterns with business features")
print("   ✅ Implemented location and category filtering")
print("   ✅ Created composite scoring algorithm")

print("\n🎯 SYSTEM CAPABILITIES:")
print("   • Real-time sentiment analysis of user reviews")
print("   • Personalized business recommendations")
print("   • Multi-factor recommendation scoring")
print("   • Scalable to large business datasets")
print("   • Interactive user interface")

print("\n📊 DATASET COVERAGE:")
print(f"   • {len(business_df):,} businesses analyzed")
print(f"   • {len(merged_df):,} reviews processed")
print(f"   • {len(sentiment_profile_df):,} sentiment profiles created")
print(f"   • Multiple cities and business categories")

print("\n🚀 FUTURE ENHANCEMENTS:")
print("   • Collaborative filtering integration")
print("   • Deep learning embeddings for businesses")
print("   • Real-time review stream processing")
print("   • Advanced user preference learning")
print("   • Mobile app deployment")

print("\n" + "=" * 70)
print("✨ System successfully combines NLP and recommendation algorithms!")
print("=" * 70)