# ReelSense: Advanced Hybrid Recommender System
**Hackathon Submission**

This notebook demonstrates the end-to-end pipeline of ReelSense, including Data Loading, EDA, Model Training (SVD, Hybrid, NeuralCF, LightGCN), and Validated Evaluation.


## 1. Setup and Initialization


In [None]:
!git clone https://github.com/Anubhab-Rakshit/brainded-hackathon.git
%cd brainded-hackathon
!pip install -r requirements.txt


In [None]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Add project to path
sys.path.append(os.getcwd())

from proj1.src.data import DataLoader
from proj1.src.recommenders import *
from proj1.src.evaluation import *
from proj1.src.explainability import Explainer

%matplotlib inline


## 2. Data Loading & Preprocessing
We use the MovieLens Small dataset.


In [None]:
# Initialize Loader
loader = DataLoader('ml-latest-small')
loader.load_data()
loader.preprocess()

# Train-Test Split (Time-based for realism)
train_df, test_df = loader.get_train_test_split(method='leave_last_n', n=1)
print(f"Train Samples: {len(train_df)}")
print(f"Test Samples: {len(test_df)}")
train_df.head()


## 3. Exploratory Data Analysis (EDA)
Visualizing the long-tail distribution and user activity.


In [None]:
# 1. Long Tail Plot
plt.figure(figsize=(10, 6))
item_counts = train_df['movieId'].value_counts()
plt.plot(item_counts.values)
plt.title('Long-Tail Distribution of Movie Ratings')
plt.xlabel('Movie Rank')
plt.ylabel('Number of Ratings')
plt.yscale('log')
plt.show()

# 2. Ratings Distribution
plt.figure(figsize=(8, 4))
sns.countplot(x='rating', data=train_df)
plt.title('Distribution of Ratings')
plt.show()


## 4. Model Training & Evaluation
We train and compare multiple architectures: SVD, NeuralCF, and LightGCN.


In [None]:
# Initialize Models
models = {
    "Popularity": PopularityRecommender(),
    "User-User CF": CollaborativeRecommender(method='user_user', n_neighbors=50),
    "SVD (Matrix Factorization)": SVDRecommender(n_components=20),
    "Neural CF (Deep Learning)": NeuralCFRecommender(embedding_dim=32, n_epochs=5),
    "LightGCN (Graph NN)": LightGCNRecommender(n_epochs=10)
}

results = {}
all_items = loader.movies['movieId'].unique()

for name, model in models.items():
    print(f"Training {name}...")
    model.fit(train_df)
    metrics, _ = evaluate_model(model, train_df, test_df, all_items, k=10)
    results[name] = metrics
    print(f"{name}: {metrics}")


## 5. Final Results Calculation


In [None]:
results_df = pd.DataFrame(results).T
results_df = results_df[['Precision@K', 'Recall@K', 'NDCG@K', 'Coverage']]

# Display Leaderboard
results_df.sort_values(by='NDCG@K', ascending=False)


## 6. Explainability Demo
Why did we recommend this?


In [None]:
# Explain a recommendation for a sample user
explainer = Explainer(loader, train_df)
sample_user = test_df['userId'].iloc[0]

recs = models['SVD (Matrix Factorization)'].recommend(sample_user, n=3)
print(f"User Profile: {explainer.user_profiles.get(sample_user, 'Unknown')}")

for movie_id in recs:
    title = loader.movies[loader.movies['movieId'] == movie_id]['title'].iloc[0]
    explanation = explainer.explain(sample_user, movie_id)
    print(f"\n🎥 Movie: {title}")
    print(f"💡 Reason: {explanation}")
