#Recommendation System

In [13]:
from datetime import datetime, timedelta
import random
import math
from typing import List, Dict, Optional

In [14]:
# Mock database functions (to be implemented based on actual data storage)
def fetch_user_errors(user_id: str, time_window: int) -> List[Dict]:
    """Fetch user errors within a given time window."""
    # Mock data for demonstration
    return [
        {
            "timestamp": datetime.now() - timedelta(days=random.randint(0, time_window)),
            "category": random.choice(["Grammar", "Vocabulary", "Pronunciation", "Fluency"]),
            "subcategory": random.choice(["prepositions", "verb tense", "word choice", "intonation"]),
            "severity": random.randint(1, 5),
        }
        for _ in range(20)
    ]

In [15]:
def get_user_profile(user_id: str) -> Dict:
    """Fetch user profile data."""
    return {
        "country": random.choice(["Japan", "India", "USA", "Germany"]),
        "age_band": random.choice(["18-25", "26-35", "36-45"]),
        "interests": random.sample(["Anime", "Movies", "Sports", "Technology"], 2),
        "proficiency_level": random.choice(["beginner", "intermediate", "advanced"]),
    }

In [16]:
def find_exercises(category: str, subcategory: str, difficulty: int) -> List[Dict]:
    """Find exercises matching the given criteria."""
    # Mock data for demonstration
    return [
        {
            "id": f"ex{random.randint(100, 999)}",
            "category": category,
            "subcategory": subcategory,
            "difficulty": difficulty,
            "themes": random.sample(["Anime", "Movies", "Sports", "Technology"], 2),
            "format": random.choice(["fill-in-blanks", "multiple-choice", "role-play"]),
        }
        for _ in range(5)
    ]

In [17]:
def is_new_user(user_id: str) -> bool:
    """Check if the user is new."""
    # Mock implementation
    return random.choice([True, False])

In [18]:
def get_last_practice_time(user_id: str, category: str) -> datetime:
    """Get the last time the user practiced a specific category."""
    # Mock implementation
    return datetime.now() - timedelta(days=random.randint(1, 30))

In [19]:
def get_improvement_rate(user_id: str, category: str) -> float:
    """Calculate the improvement rate for a category."""
    # Mock implementation
    return random.uniform(0.5, 1.5)

In [20]:
def map_country_to_themes(country: str) -> List[str]:
    """Map a country to cultural themes."""
    themes = {
        "Japan": ["Anime", "Sushi", "Samurai"],
        "India": ["Bollywood", "Cricket", "Yoga"],
        "USA": ["Hollywood", "Basketball", "Technology"],
        "Germany": ["Oktoberfest", "Cars", "Classical Music"],
    }
    return themes.get(country, ["General"])

In [21]:
def is_age_appropriate(exercise: Dict, age_band: str) -> float:
    """Check if the exercise is age-appropriate."""
    # Mock implementation
    return 1.0  # Assume all exercises are age-appropriate

In [22]:
def get_format_diversity_score(user_id: str, format: str) -> float:
    """Calculate the format diversity score."""
    # Mock implementation
    return 1.0  # Assume no format repetition


In [23]:
def generate_new_exercise(category: str, subcategory: str, user_profile: Dict) -> Dict:
    """Generate a new exercise dynamically."""
    return {
        "id": f"ex{random.randint(1000, 9999)}",
        "category": category,
        "subcategory": subcategory,
        "themes": random.sample(["Anime", "Movies", "Sports", "Technology"], 2),
        "format": random.choice(["fill-in-blanks", "multiple-choice", "role-play"]),
        "difficulty": random.randint(1, 5),
        "generated": True,
    }

In [24]:
# Core Functions
def analyze_error_patterns(user_id: str, time_window: int = 30) -> Dict:
    """Analyze error patterns for a user."""
    user_errors = fetch_user_errors(user_id, time_window)
    error_profile = {}

    for category in ["Grammar", "Vocabulary", "Pronunciation", "Fluency"]:
        category_errors = [e for e in user_errors if e["category"] == category]

        # Calculate frequency, severity, and recency
        frequency = len(category_errors) / len(user_errors) if user_errors else 0
        avg_severity = sum(e["severity"] for e in category_errors) / len(category_errors) if category_errors else 0

        now = datetime.now()
        recency_scores = [(now - e["timestamp"]).days for e in category_errors]
        recency_factor = sum(1 / (r + 1) for r in recency_scores) / len(recency_scores) if recency_scores else 0

        # Analyze subcategories
        subcategories = {}
        for error in category_errors:
            subcategory = error["subcategory"]
            if subcategory not in subcategories:
                subcategories[subcategory] = {"count": 0, "severity": 0}
            subcategories[subcategory]["count"] += 1
            subcategories[subcategory]["severity"] += error["severity"]

        # Normalize subcategory data
        for subcat in subcategories:
            subcategories[subcat]["severity"] /= subcategories[subcat]["count"]
            subcategories[subcat]["frequency"] = subcategories[subcat]["count"] / len(category_errors)

        error_profile[category] = {
            "frequency": frequency,
            "severity": avg_severity,
            "recency_factor": recency_factor,
            "subcategories": subcategories,
        }

    return error_profile

In [25]:
def select_category(user_id: str) -> str:
    """Select the category for the next exercise."""
    error_profile = analyze_error_patterns(user_id)

    if is_new_user(user_id):
        return handle_cold_start(user_id)

    category_scores = {}
    for category, data in error_profile.items():
        priority = data["frequency"] * data["severity"] * data["recency_factor"]
        last_practice = get_last_practice_time(user_id, category)
        time_since_practice = (datetime.now() - last_practice).days
        spaced_repetition_factor = 1 + math.log(1 + time_since_practice)
        improvement_rate = get_improvement_rate(user_id, category)
        improvement_factor = 2 - improvement_rate
        category_scores[category] = priority * spaced_repetition_factor * improvement_factor

    # Add exploration factor (10% chance)
    if random.random() < 0.1:
        return random.choice(list(error_profile.keys()))

    return max(category_scores, key=category_scores.get)


In [26]:
def handle_cold_start(user_id: str) -> str:
    """Handle the cold start problem for new users."""
    user_profile = get_user_profile(user_id)
    session_count = random.randint(1, 10)  # Mock session count

    if session_count < 4:
        # Cycle through categories for the first few sessions
        categories = ["Grammar", "Vocabulary", "Pronunciation", "Fluency"]
        return categories[session_count % 4]

    # Use collaborative filtering (mock implementation)
    return random.choice(["Grammar", "Vocabulary", "Pronunciation", "Fluency"])

In [27]:
def personalize_content(user_id: str, category: str, subcategory: str) -> Dict:
    """Personalize exercise content for the user."""
    user_profile = get_user_profile(user_id)
    country = user_profile.get("country")
    age_band = user_profile.get("age_band")
    interests = user_profile.get("interests", [])
    proficiency = user_profile.get("proficiency_level")

    cultural_themes = map_country_to_themes(country)
    difficulty = random.randint(1, 5)  # Mock difficulty mapping

    candidate_exercises = find_exercises(category, subcategory, difficulty)
    scored_exercises = []

    for exercise in candidate_exercises:
        score = 1.0
        interest_match = len(set(exercise["themes"]).intersection(interests)) / len(exercise["themes"])
        score *= (1 + interest_match)
        cultural_match = len(set(exercise["themes"]).intersection(cultural_themes)) / len(exercise["themes"])
        score *= (1 + cultural_match)
        age_match = is_age_appropriate(exercise, age_band)
        score *= age_match
        format_diversity = get_format_diversity_score(user_id, exercise["format"])
        score *= format_diversity
        scored_exercises.append((exercise, score))

    scored_exercises.sort(key=lambda x: x[1], reverse=True)

    if not scored_exercises or scored_exercises[0][1] < 0.5:
        return generate_new_exercise(category, subcategory, user_profile)

    return scored_exercises[0][0]

In [28]:
# Example Usage
if __name__ == "__main__":
    user_id = "user123"
    category = select_category(user_id)
    subcategory = random.choice(["prepositions", "verb tense", "word choice", "intonation"])
    exercise = personalize_content(user_id, category, subcategory)

    print(f"Selected Category: {category}")
    print(f"Selected Subcategory: {subcategory}")
    print(f"Recommended Exercise: {exercise}")

Selected Category: Grammar
Selected Subcategory: prepositions
Recommended Exercise: {'id': 'ex634', 'category': 'Grammar', 'subcategory': 'prepositions', 'difficulty': 2, 'themes': ['Sports', 'Anime'], 'format': 'multiple-choice'}
