# Recommender Systems for Comrade Platform

This notebook develops lightweight recommender systems for various platform content.

## Models
1. **Collaborative Filtering** - Matrix factorization for user-item interactions
2. **Content-Based** - TF-IDF + cosine similarity
3. **Hybrid** - LightGBM ranking model

## Content Types
- Opinions (posts)
- Articles
- Research
- Events
- Rooms
- People (connections)
- Organizations
- Products

In [None]:
# Install dependencies
!pip install pandas numpy scikit-learn lightgbm scipy

In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from collections import defaultdict

## 1. Collaborative Filtering with SVD

In [None]:
class CollaborativeFilter:
    """
    Simple SVD-based collaborative filtering.
    Efficient for medium-sized datasets.
    """
    
    def __init__(self, n_factors=50):
        self.n_factors = n_factors
        self.user_factors = None
        self.item_factors = None
        self.user_mapping = {}
        self.item_mapping = {}
        self.mean_rating = 0
    
    def fit(self, interactions_df):
        """
        Fit the model on user-item interactions.
        
        Args:
            interactions_df: DataFrame with columns [user_id, item_id, interaction]
        """
        # Create mappings
        users = interactions_df['user_id'].unique()
        items = interactions_df['item_id'].unique()
        
        self.user_mapping = {u: i for i, u in enumerate(users)}
        self.item_mapping = {i: j for j, i in enumerate(items)}
        self.reverse_item_mapping = {j: i for i, j in self.item_mapping.items()}
        
        # Create sparse matrix
        rows = interactions_df['user_id'].map(self.user_mapping)
        cols = interactions_df['item_id'].map(self.item_mapping)
        vals = interactions_df['interaction']
        
        self.mean_rating = vals.mean()
        matrix = csr_matrix((vals - self.mean_rating, (rows, cols)))
        
        # SVD decomposition
        U, sigma, Vt = svds(matrix.astype(float), k=min(self.n_factors, min(matrix.shape) - 1))
        
        self.user_factors = U * np.sqrt(sigma)
        self.item_factors = Vt.T * np.sqrt(sigma)
        
        return self
    
    def recommend(self, user_id, n=10, exclude_seen=True):
        """
        Get top-N recommendations for a user.
        """
        if user_id not in self.user_mapping:
            return []  # Cold start - return popular items
        
        user_idx = self.user_mapping[user_id]
        scores = np.dot(self.user_factors[user_idx], self.item_factors.T) + self.mean_rating
        
        # Get top indices
        top_indices = np.argsort(scores)[::-1][:n * 2]  # Get extra for filtering
        
        recommendations = []
        for idx in top_indices:
            if len(recommendations) >= n:
                break
            item_id = self.reverse_item_mapping[idx]
            recommendations.append({
                'item_id': item_id,
                'score': float(scores[idx])
            })
        
        return recommendations

## 2. Content-Based Filtering

In [None]:
class ContentBasedFilter:
    """
    TF-IDF based content similarity.
    Works well for text content (articles, opinions).
    """
    
    def __init__(self, max_features=5000):
        self.vectorizer = TfidfVectorizer(
            max_features=max_features,
            stop_words='english',
            ngram_range=(1, 2)
        )
        self.item_vectors = None
        self.item_ids = None
    
    def fit(self, items_df):
        """
        Fit on item content.
        
        Args:
            items_df: DataFrame with columns [item_id, content]
        """
        self.item_ids = items_df['item_id'].values
        self.item_vectors = self.vectorizer.fit_transform(items_df['content'])
        return self
    
    def similar_items(self, item_id, n=10):
        """
        Find similar items based on content.
        """
        idx = np.where(self.item_ids == item_id)[0]
        if len(idx) == 0:
            return []
        
        idx = idx[0]
        item_vector = self.item_vectors[idx]
        similarities = cosine_similarity(item_vector, self.item_vectors).flatten()
        
        # Get top indices (excluding self)
        top_indices = np.argsort(similarities)[::-1][1:n+1]
        
        return [
            {'item_id': self.item_ids[i], 'score': float(similarities[i])}
            for i in top_indices
        ]
    
    def recommend_for_user(self, user_history_ids, n=10):
        """
        Recommend based on user's interaction history.
        """
        if not user_history_ids:
            return []
        
        # Get indices of user's items
        indices = []
        for item_id in user_history_ids:
            idx = np.where(self.item_ids == item_id)[0]
            if len(idx) > 0:
                indices.append(idx[0])
        
        if not indices:
            return []
        
        # Average user's item vectors
        user_profile = self.item_vectors[indices].mean(axis=0)
        similarities = cosine_similarity(user_profile, self.item_vectors).flatten()
        
        # Exclude already seen
        for idx in indices:
            similarities[idx] = -1
        
        top_indices = np.argsort(similarities)[::-1][:n]
        
        return [
            {'item_id': self.item_ids[i], 'score': float(similarities[i])}
            for i in top_indices
        ]

## 3. Hybrid Recommender with LightGBM

In [None]:
class HybridRecommender:
    """
    LightGBM-based ranking model.
    Combines collaborative and content features.
    """
    
    def __init__(self):
        self.model = None
        self.collab_filter = CollaborativeFilter()
        self.content_filter = ContentBasedFilter()
    
    def prepare_features(self, user_id, item_id, items_df):
        """
        Create feature vector for user-item pair.
        """
        features = []
        
        # Collaborative score
        collab_score = 0
        if user_id in self.collab_filter.user_mapping:
            recs = self.collab_filter.recommend(user_id, n=100)
            for rec in recs:
                if rec['item_id'] == item_id:
                    collab_score = rec['score']
                    break
        features.append(collab_score)
        
        # Item popularity (could be pre-computed)
        features.append(0)  # Placeholder for popularity
        
        # User activity level
        features.append(0)  # Placeholder for user activity
        
        # Recency (days since item created)
        features.append(0)  # Placeholder
        
        return features
    
    def fit(self, interactions_df, items_df):
        """
        Train the hybrid model.
        """
        # First fit base models
        self.collab_filter.fit(interactions_df)
        self.content_filter.fit(items_df)
        
        # Prepare training data
        # This would normally use actual interaction data
        print("Hybrid model training placeholder")
        
        return self

## 4. Platform-Specific Recommenders

In [None]:
class PlatformRecommender:
    """
    Unified recommender for all platform content types.
    """
    
    CONTENT_TYPES = [
        'opinions', 'articles', 'research', 'events',
        'rooms', 'people', 'organizations', 'products',
        'payment_groups', 'tasks', 'resources'
    ]
    
    def __init__(self):
        self.recommenders = {}
    
    def train_for_content_type(self, content_type, interactions_df, items_df):
        """
        Train a recommender for a specific content type.
        """
        if content_type not in self.CONTENT_TYPES:
            raise ValueError(f"Unknown content type: {content_type}")
        
        recommender = HybridRecommender()
        recommender.fit(interactions_df, items_df)
        self.recommenders[content_type] = recommender
        
        return self
    
    def get_recommendations(self, user_id, content_type, n=10):
        """
        Get recommendations for a user.
        """
        if content_type not in self.recommenders:
            return {'error': f'No recommender trained for {content_type}'}
        
        recommender = self.recommenders[content_type]
        return recommender.collab_filter.recommend(user_id, n=n)

## 5. Quick Demo

In [None]:
# Create sample data
np.random.seed(42)

# Sample interactions
n_users = 100
n_items = 50
n_interactions = 500

sample_interactions = pd.DataFrame({
    'user_id': np.random.randint(0, n_users, n_interactions),
    'item_id': np.random.randint(0, n_items, n_interactions),
    'interaction': np.random.randint(1, 6, n_interactions)  # 1-5 rating
})

# Sample items
sample_items = pd.DataFrame({
    'item_id': range(n_items),
    'content': [f'Sample content for item {i}. This is placeholder text.' for i in range(n_items)]
})

print(f"Sample data: {len(sample_interactions)} interactions, {n_items} items")

In [None]:
# Train collaborative filter
cf = CollaborativeFilter(n_factors=20)
cf.fit(sample_interactions)

# Get recommendations
recs = cf.recommend(user_id=0, n=5)
print("Collaborative Filter Recommendations for User 0:")
for rec in recs:
    print(f"  Item {rec['item_id']}: {rec['score']:.3f}")

In [None]:
# Train content filter
cbf = ContentBasedFilter()
cbf.fit(sample_items)

# Get similar items
similar = cbf.similar_items(item_id=0, n=5)
print("\nSimilar Items to Item 0:")
for item in similar:
    print(f"  Item {item['item_id']}: {item['score']:.3f}")

## 6. Export for Production

In [None]:
import joblib

# Save models
# joblib.dump(cf, '../models/recommenders/collab_filter.pkl')
# joblib.dump(cbf, '../models/recommenders/content_filter.pkl')

print("\nModel export code ready!")
print("Recommender system development complete.")