## Import Libraries

In [10]:
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics.pairwise import cosine_similarity

## Load Data

In [11]:
# Load raw data
users = pd.read_csv("../data/raw/users.csv")
programs = pd.read_csv("../data/raw/programs.csv")
interactions = pd.read_csv("../data/raw/interactions.csv")

print(f"Users: {len(users)}, Programs: {len(programs)}, Interactions: {len(interactions)}")

Users: 500, Programs: 50, Interactions: 8004


## Load Pre-trained Models

**Note:** These models were trained in notebooks 02 and 03 using only the 80% training data split.

In [12]:
# Load content-based model (trained on all programs)
tfidf_vectorizer = joblib.load("../models/tfidf.pkl")
tfidf_matrix = joblib.load("../models/program_tfidf.pkl")

# Load CF model (trained on 80% training interactions)
cf_model = joblib.load("../models/cf_svd.pkl")
predicted_scores = cf_model["predicted_scores"]
user_id_map = cf_model["user_id_map"]
item_id_map = cf_model["item_id_map"]
reverse_item_map = cf_model["reverse_item_map"]
interaction_matrix = cf_model["interaction_matrix"]

print("âœ“ Models loaded successfully")


âœ“ Models loaded successfully


## Content-Based Recommendation Function

In [13]:
def recommend_content_based(user_interests, k=5, return_scores=True):
    """
    Recommend programs based on user interests using content-based filtering.
    
    Args:
        user_interests: String of comma-separated interests (e.g., "art, design, technology")
        k: Number of recommendations
        return_scores: If True, return normalized scores
    """
    # Vectorize user interests
    user_vector = tfidf_vectorizer.transform([user_interests])
    
    # Compute similarity with all programs
    similarities = cosine_similarity(user_vector, tfidf_matrix).flatten()
    
    # Get top-k programs
    top_indices = np.argsort(similarities)[::-1][:k]
    
    if return_scores:
        # Normalize scores to 0-1 range
        max_score = similarities.max() if similarities.max() > 0 else 1
        normalized_scores = similarities / max_score
        return [(programs.iloc[i]["program_id"], normalized_scores[i]) for i in top_indices]
    else:
        return [programs.iloc[i]["program_id"] for i in top_indices]

## Collaborative Filtering Recommendation Function

In [14]:
def recommend_cf(user_id, k=5, return_scores=True):
    """
    Recommend programs using collaborative filtering.
    
    Args:
        user_id: User ID (e.g., "u_0")
        k: Number of recommendations
        return_scores: If True, return normalized scores
    """
    # Check if user exists in training data
    if user_id not in user_id_map:
        return None  # Cold-start case
    
    user_idx = user_id_map[user_id]
    scores = predicted_scores[user_idx]
    
    # Filter out already-interacted programs
    interacted_items = interaction_matrix[user_idx].nonzero()[1]
    scores_copy = scores.copy()
    scores_copy[interacted_items] = -np.inf
    
    # Get top-k programs
    top_items = np.argsort(scores_copy)[::-1][:k]
    
    if return_scores:
        # Normalize scores to 0-1 range
        valid_scores = scores[scores > -np.inf]
        if len(valid_scores) > 0:
            min_score, max_score = valid_scores.min(), valid_scores.max()
            score_range = max_score - min_score if max_score > min_score else 1
            normalized_scores = (scores - min_score) / score_range
        else:
            normalized_scores = scores
        
        return [(reverse_item_map[i], normalized_scores[i]) for i in top_items]
    else:
        return [reverse_item_map[i] for i in top_items]

## Hybrid Recommendation Function

In [15]:
def recommend_hybrid(user_id=None, user_interests=None, k=3, content_weight=0.6, cf_weight=0.4):
    """
    Hybrid recommender combining content-based and collaborative filtering.
    
    Args:
        user_id: User ID for CF (optional if new user)
        user_interests: User interests string for content-based (required)
        k: Number of recommendations
        content_weight: Weight for content-based scores (default: 0.6)
        cf_weight: Weight for CF scores (default: 0.4)
    """
    if user_interests is None:
        raise ValueError("user_interests is required")
    
    # Get content-based recommendations (always available)
    content_recs = recommend_content_based(user_interests, k=10, return_scores=True)
    
    # Try to get CF recommendations
    cf_recs = None
    if user_id and user_id in user_id_map:
        cf_recs = recommend_cf(user_id, k=10, return_scores=True)
    
    # Combine scores
    combined_scores = {}
    
    if cf_recs is None:
        # Cold-start: use only content-based
        print(f"Cold-start user - using 100% content-based")
        for program_id, score in content_recs:
            combined_scores[program_id] = score
    else:
        # Hybrid: weighted combination
        # Add content-based scores
        for program_id, score in content_recs:
            combined_scores[program_id] = content_weight * score
        
        # Add CF scores
        for program_id, score in cf_recs:
            if program_id in combined_scores:
                combined_scores[program_id] += cf_weight * score
            else:
                combined_scores[program_id] = cf_weight * score
    
    # Sort by combined score and return top-k
    sorted_recs = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:k]
    
    return sorted_recs

## Explanation Generation

In [16]:
def generate_explanation(program_id, user_interests, user_id=None):
    """
    Generate a human-readable explanation for why a program was recommended.
    
    Args:
        program_id: Program ID (e.g., "p_1")
        user_interests: User's interests
        user_id: User ID (optional, for CF explanations)
    """
    # Get program details
    program = programs[programs["program_id"] == program_id].iloc[0]
    
    # Extract key skills from program description
    skills = program["tags_text"].split()[:3]  # Top 3 skills
    
    # Content-based explanation
    user_interest_list = [i.strip().title() for i in user_interests.split(",")[:3]]
    content_explanation = f"Matches your interests in {', '.join(user_interest_list)}"
    
    # CF explanation (if user has history)
    cf_explanation = None
    if user_id and user_id in user_id_map:
        cf_explanation = "Users with similar preferences also liked this program"
    
    # Combine explanations
    explanations = [content_explanation]
    if cf_explanation:
        explanations.append(cf_explanation)
    
    # Add program highlights
    highlights = f"Key skills: {', '.join(skills)}"
    
    return {
        "main_reason": explanations[0],
        "additional_reasons": explanations[1:],
        "highlights": highlights
    }

## Complete Recommendation with Explanations

In [17]:
def get_recommendations_with_explanations(user_id=None, user_interests=None, k=3):
    """
    Get recommendations with detailed explanations.
    
    Returns a list of recommendations with:
    - Program details
    - Score
    - Explanations
    """
    # Get hybrid recommendations
    recommendations = recommend_hybrid(user_id, user_interests, k)
    
    # Enrich with program details and explanations
    results = []
    for program_id, score in recommendations:
        program = programs[programs["program_id"] == program_id].iloc[0]
        explanation = generate_explanation(program_id, user_interests, user_id)
        
        results.append({
            "program_id": program_id,
            "program_name": program["name"],
            "score": round(score, 3),
            "explanation": explanation,
            "description": program["description"][:150] + "...",  # Truncated
            "skills": program["tags_text"]
        })
    
    return results

## Test Hybrid Recommender

### Test 1: Existing user (with interaction history)

In [18]:
# Get an existing user
test_user = users.iloc[0]
print(f"Testing with User: {test_user['user_id']}")
print(f"Interests: {test_user['interests_text']}")
print(f"Grades - Math: {test_user['math_grade']}, Science: {test_user['science_grade']}, Language: {test_user['language_grade']}")
print("\n" + "="*80 + "\n")

# Get recommendations
recommendations = get_recommendations_with_explanations(
    user_id=test_user["user_id"],
    user_interests=test_user["interests_text"],
    k=3
)

# Display results
for i, rec in enumerate(recommendations, 1):
    print(f"\n{i}. {rec['program_name']} (Score: {rec['score']})")
    print(f"   Program ID: {rec['program_id']}")
    print(f"\n   ðŸ“Œ Main Reason: {rec['explanation']['main_reason']}")
    if rec['explanation']['additional_reasons']:
        for reason in rec['explanation']['additional_reasons']:
            print(f"   ðŸ“Œ {reason}")
    print(f"   ðŸ’¡ {rec['explanation']['highlights']}")
    print(f"\n   Description: {rec['description']}")
    print("\n" + "-"*80)

Testing with User: u_0000
Interests: technology politics sustainability genetics
Grades - Math: 80, Science: 73, Language: 82



1. Environmental Engineering (Score: 0.6)
   Program ID: p_021

   ðŸ“Œ Main Reason: Matches your interests in Technology Politics Sustainability Genetics
   ðŸ“Œ Users with similar preferences also liked this program
   ðŸ’¡ Key skills: engineering, environment, ecology

   Description: Sustainability engineering solutions...

--------------------------------------------------------------------------------

2. Environmental Science (Score: 0.51)
   Program ID: p_013

   ðŸ“Œ Main Reason: Matches your interests in Technology Politics Sustainability Genetics
   ðŸ“Œ Users with similar preferences also liked this program
   ðŸ’¡ Key skills: environment, ecology, science

   Description: Ecology and sustainability...

--------------------------------------------------------------------------------

3. Molecular Biology (Score: 0.456)
   Program ID: p_012

   ðŸ“

### Test 2: New user (cold-start scenario)

In [19]:
# Simulate a new user with no interaction history
new_user_interests = "mathematics, physics, problem-solving, research"

print(f"Testing with NEW USER (Cold-start)")
print(f"Interests: {new_user_interests}")
print("\n" + "="*80 + "\n")

# Get recommendations (no user_id)
recommendations = get_recommendations_with_explanations(
    user_id=None,  # No user ID = new user
    user_interests=new_user_interests,
    k=3
)

# Display results
for i, rec in enumerate(recommendations, 1):
    print(f"\n{i}. {rec['program_name']} (Score: {rec['score']})")
    print(f"   Program ID: {rec['program_id']}")
    print(f"\n   ðŸ“Œ Main Reason: {rec['explanation']['main_reason']}")
    print(f"   ðŸ’¡ {rec['explanation']['highlights']}")
    print(f"\n   Description: {rec['description']}")
    print("\n" + "-"*80)

Testing with NEW USER (Cold-start)
Interests: mathematics, physics, problem-solving, research


Cold-start user - using 100% content-based

1. Mathematics (Score: 1.0)
   Program ID: p_006

   ðŸ“Œ Main Reason: Matches your interests in Mathematics, Physics, Problem-Solving
   ðŸ’¡ Key skills: math, calculus, algebra

   Description: Pure and applied mathematics...

--------------------------------------------------------------------------------

2. Physics (Score: 0.446)
   Program ID: p_008

   ðŸ“Œ Main Reason: Matches your interests in Mathematics, Physics, Problem-Solving
   ðŸ’¡ Key skills: physics, math, science

   Description: Physical sciences and research...

--------------------------------------------------------------------------------

3. Mechanical Engineering (Score: 0.227)
   Program ID: p_015

   ðŸ“Œ Main Reason: Matches your interests in Mathematics, Physics, Problem-Solving
   ðŸ’¡ Key skills: engineering, mechanics, math

   Description: Machines and systems desi

### Test 3: User with artistic interests

In [20]:
# Test with artistic profile
artistic_interests = "drawing, design, creativity, visual arts, aesthetics"

print(f"Testing with ARTISTIC PROFILE")
print(f"Interests: {artistic_interests}")
print("\n" + "="*80 + "\n")

recommendations = get_recommendations_with_explanations(
    user_id=None,
    user_interests=artistic_interests,
    k=3
)

for i, rec in enumerate(recommendations, 1):
    print(f"\n{i}. {rec['program_name']} (Score: {rec['score']})")
    print(f"   ðŸ“Œ {rec['explanation']['main_reason']}")
    print(f"   ðŸ’¡ {rec['explanation']['highlights']}")
    print("\n" + "-"*80)

Testing with ARTISTIC PROFILE
Interests: drawing, design, creativity, visual arts, aesthetics


Cold-start user - using 100% content-based

1. Interior Design (Score: 1.0)
   ðŸ“Œ Matches your interests in Drawing, Design, Creativity
   ðŸ’¡ Key skills: design, creativity, art

--------------------------------------------------------------------------------

2. Fine Arts (Score: 0.96)
   ðŸ“Œ Matches your interests in Drawing, Design, Creativity
   ðŸ’¡ Key skills: art, painting, drawing

--------------------------------------------------------------------------------

3. Graphic Design (Score: 0.897)
   ðŸ“Œ Matches your interests in Drawing, Design, Creativity
   ðŸ’¡ Key skills: design, art, creativity

--------------------------------------------------------------------------------


## Save Hybrid Model Functions

Save the hybrid recommendation pipeline for API deployment.

In [21]:
# Package hybrid model with helper functions
hybrid_model = {
    "tfidf_vectorizer": tfidf_vectorizer,
    "tfidf_matrix": tfidf_matrix,
    "cf_model": cf_model,
    "programs": programs,
    "functions": {
        "recommend_hybrid": recommend_hybrid,
        "get_recommendations_with_explanations": get_recommendations_with_explanations,
        "generate_explanation": generate_explanation
    }
}

joblib.dump(hybrid_model, "../models/hybrid_recommender.pkl")
print("âœ“ Hybrid model saved to ../models/hybrid_recommender.pkl")


âœ“ Hybrid model saved to ../models/hybrid_recommender.pkl


## Summary

**Hybrid Recommender Features:**
- âœ… Combines content-based (60%) and collaborative filtering (40%)
- âœ… Handles cold-start users with fallback to content-based
- âœ… Generates human-readable explanations
- âœ… Filters out already-seen programs
- âœ… Returns enriched recommendations with program details

**Next Steps:**
1. Evaluation notebook with NDCG@k, Precision@k metrics
2. Build FastAPI application
3. Create simple UI for user interaction