# Main Recommender System Pipeline

This notebook orchestrates the complete recommender system workflow, from data loading to generating final recommendations.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
sns.set_style('whitegrid')

print("Environment setup complete!")

## 1. Data Loading and Preprocessing

Run the data preprocessing pipeline.

In [None]:
# Import preprocessing functions
# %run data_preprocessing.ipynb

# Or manually load processed data
# user_item_matrix = pd.read_csv('../results/user_item_matrix.csv', index_col=0)
# print(f"Loaded user-item matrix: {user_item_matrix.shape}")

## 2. Build Collaborative Filtering Model

In [None]:
# Load or compute collaborative filtering
# %run collaborative.ipynb

# Or load precomputed similarities
# user_similarity = pd.read_csv('../results/user_similarity.csv', index_col=0)
# item_similarity_cf = pd.read_csv('../results/item_similarity.csv', index_col=0)
# print("Collaborative filtering models loaded")

## 3. Build Content-Based Model

In [None]:
# Load or compute content-based filtering
# %run content_based.ipynb

# Or load precomputed models
# item_similarity_content = pd.read_csv('../results/content_item_similarity.csv', index_col=0)
# print("Content-based model loaded")

## 4. Generate Recommendations for Target Users

In [None]:
# Define target users
# target_users = user_item_matrix.index[:10]  # First 10 users as example

# Generate recommendations using different methods
# all_recommendations = {}

# for user_id in target_users:
#     all_recommendations[user_id] = {
#         'collaborative': get_user_recommendations(user_id, user_item_matrix, user_similarity),
#         'content_based': get_user_profile_recommendations(user_id, user_item_matrix, item_similarity_content, items),
#         'hybrid': weighted_hybrid_recommendations(user_id, collaborative_recs, content_recs)
#     }

# print(f"Generated recommendations for {len(target_users)} users")

## 5. Display Sample Recommendations

In [None]:
# Display recommendations for a sample user
# sample_user = target_users[0]

# print(f"\n{'='*80}")
# print(f"RECOMMENDATIONS FOR USER: {sample_user}")
# print(f"{'='*80}\n")

# print("Collaborative Filtering:")
# print(all_recommendations[sample_user]['collaborative'].head())

# print("\nContent-Based:")
# print(all_recommendations[sample_user]['content_based'].head())

# print("\nHybrid:")
# print(all_recommendations[sample_user]['hybrid'].head())

## 6. Evaluate Recommendation Quality

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Split data for evaluation
# train_data, test_data = train_test_split(ratings_clean, test_size=0.2, random_state=42)

# Calculate metrics
# def calculate_metrics(predictions, actuals):
#     mae = mean_absolute_error(actuals, predictions)
#     rmse = np.sqrt(mean_squared_error(actuals, predictions))
#     return {'MAE': mae, 'RMSE': rmse}

# evaluation_results = {
#     'collaborative': calculate_metrics(...),
#     'content_based': calculate_metrics(...),
#     'hybrid': calculate_metrics(...)
# }

# results_df = pd.DataFrame(evaluation_results).T
# print("\nEvaluation Results:")
# print(results_df)

## 7. Visualize System Performance

In [None]:
# Compare performance of different methods
# fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# # Plot MAE comparison
# results_df['MAE'].plot(kind='bar', ax=axes[0], color=['#3498db', '#e74c3c', '#2ecc71'])
# axes[0].set_title('Mean Absolute Error Comparison')
# axes[0].set_ylabel('MAE')
# axes[0].set_xlabel('Method')
# axes[0].grid(True, alpha=0.3)

# # Plot RMSE comparison
# results_df['RMSE'].plot(kind='bar', ax=axes[1], color=['#3498db', '#e74c3c', '#2ecc71'])
# axes[1].set_title('Root Mean Square Error Comparison')
# axes[1].set_ylabel('RMSE')
# axes[1].set_xlabel('Method')
# axes[1].grid(True, alpha=0.3)

# plt.tight_layout()
# plt.savefig('../plots/performance_comparison.png', dpi=300, bbox_inches='tight')
# plt.show()

## 8. Generate Final Recommendation Report

In [None]:
# Create comprehensive recommendation report
# final_report = []

# for user_id in target_users:
#     user_report = {
#         'user_id': user_id,
#         'num_ratings': (user_item_matrix.loc[user_id] > 0).sum(),
#         'avg_rating': user_item_matrix.loc[user_id].mean(),
#         'top_collaborative_rec': all_recommendations[user_id]['collaborative'].iloc[0]['item_id'],
#         'top_content_rec': all_recommendations[user_id]['content_based'].iloc[0]['item_id'],
#         'top_hybrid_rec': all_recommendations[user_id]['hybrid'].iloc[0]['item_id']
#     }
#     final_report.append(user_report)

# report_df = pd.DataFrame(final_report)
# print("\nFinal Recommendation Report:")
# print(report_df)

# # Save report
# report_df.to_csv('../results/final_recommendation_report.csv', index=False)
# print("\nReport saved to: ../results/final_recommendation_report.csv")

## 9. Summary and Conclusions

In [None]:
print("\n" + "="*80)
print("RECOMMENDER SYSTEM PIPELINE - SUMMARY")
print("="*80)

print("\n1. Data Processing:")
# print(f"   - Total users: {user_item_matrix.shape[0]}")
# print(f"   - Total items: {user_item_matrix.shape[1]}")
# print(f"   - Sparsity: {(user_item_matrix == 0).sum().sum() / user_item_matrix.size * 100:.2f}%")

print("\n2. Methods Implemented:")
print("   ✓ Collaborative Filtering (User-based & Item-based)")
print("   ✓ Content-Based Filtering")
print("   ✓ Hybrid Recommender")

print("\n3. Performance:")
# print(results_df)

print("\n4. Output Files:")
print("   - User-item matrix: results/user_item_matrix.csv")
print("   - Similarity matrices: results/")
print("   - Recommendations: results/final_recommendation_report.csv")
print("   - Plots: plots/performance_comparison.png")

print("\n" + "="*80)
print("Pipeline execution complete!")
print("="*80)