In [2]:
"""
Enhanced Personalized Interview System - Cold Start with Profile Building
Collects comprehensive session data and builds detailed user profiles
"""

from typing import List, Dict, Set, Optional, Tuple
from datetime import datetime
from dataclasses import dataclass, field
from collections import defaultdict
import json


@dataclass
class UserProfile:
    """User Profile Data Structure"""
    _id: str
    name: str
    email: str
    skills: List[str]
    skill_levels: Dict[str, int]
    years_experience: float
    education: str
    target_domains: List[str]
    target_companies: List[str]
    created_at: str
    last_active: str
    # New fields for enhanced profiling
    weaker_skills: List[str] = field(default_factory=list)
    stronger_skills: List[str] = field(default_factory=list)
    skill_metadata: Dict[str, Dict] = field(default_factory=dict)
    # Domain-specific tracking
    domain_specific_weaker_skills: Dict[str, List[str]] = field(default_factory=dict)
    domain_specific_stronger_skills: Dict[str, List[str]] = field(default_factory=dict)
    domain_specific_skill_metadata: Dict[str, Dict[str, Dict]] = field(default_factory=dict)


@dataclass
class Question:
    """Question Bank Data Structure"""
    _id: str
    text: str
    tags: List[str]
    domain: str
    difficulty: int  # 1-5 scale
    estimated_time_sec: int
    embedding: List[float] = field(default_factory=list)
    created_by: str = "editor"
    popularity: int = 0
    company_specific: List[str] = field(default_factory=list)
    last_updated: str = ""


@dataclass
class QuestionResponse:
    """Individual Question Response with Enhanced Metadata"""
    _id: str
    session_id: str
    user_id: str
    question_id: str
    response_text: str
    response_audio_features: Dict[str, float]
    content_metrics: Dict[str, float]
    final_score: float  # 0-1 normalized
    timestamp: str
    # Enhanced fields
    time_taken_sec: int = 0
    skipped: bool = False
    confidence_rating: Optional[int] = None  # 1-5 if available
    keyword_matches: List[str] = field(default_factory=list)
    # Domain tracking
    domain: str = ""


@dataclass
class Session:
    """User Session Data with Enhanced Tracking"""
    _id: str
    user_id: str
    context: Dict[str, str]
    questions: List[Dict[str, str]]
    created_at: str
    metrics_snapshot: Dict[str, float]
    # Enhanced fields for profile building
    skill_performance: Dict[str, List[float]] = field(default_factory=dict)
    weaker_skills_identified: List[str] = field(default_factory=list)
    stronger_skills_identified: List[str] = field(default_factory=list)
    total_time_sec: int = 0
    questions_skipped: int = 0
    # Domain tracking
    domain: str = ""


@dataclass
class SkillAnalysis:
    """Detailed skill analysis from session data"""
    skill: str
    avg_score: float
    total_questions: int
    avg_time_sec: float
    skip_rate: float
    avg_confidence: Optional[float]
    keyword_match_rate: float
    category: str  # 'weaker', 'stronger', 'moderate'


@dataclass
class RecommendedQuestion:
    """Recommended Question Output with Reasoning"""
    q_id: str
    score: float
    reasoning: str = ""


class InterviewSystem:
    """Enhanced Interview System with Comprehensive Cold Start"""
    
    def __init__(self, question_bank: List[Question]):
        self.question_bank = question_bank
        self.sessions_db: Dict[str, List[Session]] = defaultdict(list)
        self.responses_db: Dict[str, List[QuestionResponse]] = defaultdict(list)
        self.skill_vectors: Dict[str, Dict[str, float]] = {}
        self.user_profiles: Dict[str, UserProfile] = {}
        # Domain-specific storage
        self.domain_sessions_db: Dict[str, Dict[str, List[Session]]] = defaultdict(lambda: defaultdict(list))
        self.domain_responses_db: Dict[str, Dict[str, List[QuestionResponse]]] = defaultdict(lambda: defaultdict(list))
        print(f"\n{'='*70}")
        print(f"[INIT] Enhanced Interview System Initialized")
        print(f"[INIT] Total Questions in Bank: {len(question_bank)}")
        print(f"{'='*70}\n")
    
    # ==================== ENHANCED COLD START FLOW ====================
    
    def enhanced_cold_start_flow(self, user: UserProfile, k: int = 10, 
                                   is_first_session: bool = True, chosen_domain: str = None) -> Dict:
        """
        Enhanced Cold Start Flow with Profile Building
        
        Phase 1 (First Session): Collect comprehensive data
        Phase 2+ (Subsequent Sessions): Use built profile for recommendations
        """
        print(f"\n{'='*70}")
        print(f"[ENHANCED COLD START] Profile-Building Flow")
        print(f"{'='*70}")
        print(f"[0.1] User: {user._id}")
        print(f"[0.2] Is First Session: {is_first_session}")
        print(f"[0.3] Chosen Domain: {chosen_domain}")
        
        if is_first_session:
            return self._first_session_recommendations(user, k, chosen_domain)
        else:
            return self._profile_based_recommendations(user, k, chosen_domain)
    
    def _first_session_recommendations(self, user: UserProfile, k: int, chosen_domain: str = None) -> Dict:
        """
        First Session: Diverse question set to assess all skills
        Focus on gathering comprehensive data
        """
        print(f"\n[PHASE 1] First Session - Comprehensive Assessment")
        print(f"{'─'*70}")
        print(f"[1.1] Goal: Assess all user skills comprehensively")
        print(f"[1.2] User Skills to Assess: {user.skills}")
        
        # Filter by domain
        target_domains = [chosen_domain] if chosen_domain else user.target_domains
        print(f"\n[1.3] Filtering by domain: {target_domains}")
        domain_questions = self._filter_by_domain(target_domains)
        print(f"      ✓ {len(domain_questions)} questions in target domains")
        
        # Create diverse question set covering all skills
        print(f"\n[1.4] Creating diverse question set...")
        diverse_questions = self._create_diverse_assessment_set(
            domain_questions, 
            user.skills, 
            k
        )
        
        # Score and rank
        print(f"\n[1.5] Scoring questions for initial assessment...")
        scored_questions = []
        for i, question in enumerate(diverse_questions):
            print(f"\n      Question {i+1}/{len(diverse_questions)}: {question._id}")
            print(f"      - Tags: {question.tags}")
            print(f"      - Difficulty: {question.difficulty}")
            
            # Prioritize skill coverage over difficulty match
            skill_overlap = self._calculate_skill_overlap(
                question.tags, 
                user.skills
            )
            
            # Slight penalty for extreme difficulties in first session
            difficulty_factor = 1.0
            if question.difficulty == 1 or question.difficulty == 5:
                difficulty_factor = 0.9
            
            score = skill_overlap * difficulty_factor
            print(f"      - Skill Overlap: {skill_overlap:.2f}")
            print(f"      - Difficulty Factor: {difficulty_factor:.2f}")
            print(f"      - Final Score: {score:.2f}")
            
            scored_questions.append((question, score))
        
        # Rank and select
        ranked = sorted(scored_questions, key=lambda x: x[1], reverse=True)
        
        recommendations = [
            RecommendedQuestion(
                q_id=q._id, 
                score=round(score, 2),
                reasoning=f"Initial assessment for {', '.join(q.tags)}"
            )
            for q, score in ranked[:k]
        ]
        
        print(f"\n[1.6] ✓ First Session Recommendations Ready")
        print(f"      - Total Recommended: {len(recommendations)}")
        for i, rec in enumerate(recommendations):
            print(f"      {i+1}. {rec.q_id} (Score: {rec.score}) - {rec.reasoning}")
        
        output = {
            "user_id": user._id,
            "session_type": "cold_start_first_session",
            "phase": "data_collection",
            "domain": chosen_domain if chosen_domain else user.target_domains[0],
            "message": "First session: Comprehensive skill assessment",
            "questions_recommended": [
                {
                    "q_id": r.q_id, 
                    "score": r.score, 
                    "reasoning": r.reasoning
                }
                for r in recommendations
            ]
        }
        
        print(f"{'='*70}\n")
        return output
    
    def _create_diverse_assessment_set(self, questions: List[Question], 
                                        skills: List[str], k: int) -> List[Question]:
        """
        Create diverse question set ensuring coverage of all skills
        """
        skill_coverage = defaultdict(list)
        
        # Group questions by skills they test
        for question in questions:
            for tag in question.tags:
                if tag in skills:
                    skill_coverage[tag].append(question)
        
        print(f"      - Skill Coverage Analysis:")
        for skill, qs in skill_coverage.items():
            print(f"        {skill}: {len(qs)} questions available")
        
        # Select questions ensuring each skill is tested
        selected = []
        selected_ids = set()
        
        # Round-robin selection from each skill
        max_per_skill = max(1, k // len(skills))
        print(f"      - Target: {max_per_skill} questions per skill")
        
        for skill in skills:
            skill_questions = skill_coverage.get(skill, [])
            for q in skill_questions[:max_per_skill]:
                if q._id not in selected_ids:
                    selected.append(q)
                    selected_ids.add(q._id)
                    if len(selected) >= k:
                        break
            if len(selected) >= k:
                break
        
        # Fill remaining slots if needed
        if len(selected) < k:
            for q in questions:
                if q._id not in selected_ids:
                    selected.append(q)
                    selected_ids.add(q._id)
                    if len(selected) >= k:
                        break
        
        print(f"      ✓ Selected {len(selected)} diverse questions")
        return selected
    
    def _profile_based_recommendations(self, user: UserProfile, k: int, chosen_domain: str = None) -> Dict:
        """
        Subsequent Sessions: Use built profile for targeted recommendations
        Focus on weaker skills with adaptive difficulty
        """
        print(f"\n[PHASE 2] Profile-Based Recommendations")
        print(f"{'─'*70}")
        
        # Analyze user profile for specific domain
        print(f"[2.1] Analyzing User Profile for Domain: {chosen_domain}...")
        skill_analyses = self._analyze_user_skills(user._id, chosen_domain)
        
        if not skill_analyses:
            print(f"      ⚠ No profile data found, falling back to first session flow")
            return self._first_session_recommendations(user, k, chosen_domain)
        
        print(f"\n[2.2] Skill Analysis Summary:")
        for analysis in skill_analyses:
            print(f"      - {analysis.skill}: {analysis.category.upper()}")
            print(f"        Avg Score: {analysis.avg_score:.2f}, "
                  f"Questions: {analysis.total_questions}, "
                  f"Avg Time: {analysis.avg_time_sec:.0f}s")
        
        # Identify weaker and stronger skills
        weaker_skills = [a.skill for a in skill_analyses if a.category == 'weaker']
        stronger_skills = [a.skill for a in skill_analyses if a.category == 'stronger']
        
        print(f"\n[2.3] Skill Categorization:")
        print(f"      - Weaker Skills (need improvement): {weaker_skills}")
        print(f"      - Stronger Skills (validation): {stronger_skills}")
        
        # Get adaptive difficulty for specific domain
        last_score = self._get_last_score(user._id, chosen_domain)
        difficulty_adjustment = self._calculate_difficulty_adjustment(last_score)
        target_difficulty = max(1, min(5, 2 + difficulty_adjustment))
        
        print(f"\n[2.4] Difficulty Adaptation:")
        print(f"      - Last Score: {last_score:.2f}")
        print(f"      - Adjustment: {difficulty_adjustment:+d}")
        print(f"      - Target Difficulty: {target_difficulty}")
        
        # Filter candidates
        attempted = self._get_attempted_questions(user._id, chosen_domain)
        
        print(f"\n[2.5] Filtering Candidate Questions...")
        print(f"      - Previously Attempted: {len(attempted)}")
        
        # Get questions focused on weaker skills (70%)
        target_domains = [chosen_domain] if chosen_domain else user.target_domains
        weaker_candidates = self._filter_candidate_questions(
            target_domains,
            weaker_skills,
            target_difficulty,
            attempted
        )
        
        # Get questions for stronger skills (30%)
        stronger_candidates = self._filter_candidate_questions(
            target_domains,
            stronger_skills,
            target_difficulty,
            attempted
        )
        
        print(f"      - Weaker Skills Candidates: {len(weaker_candidates)}")
        print(f"      - Stronger Skills Candidates: {len(stronger_candidates)}")
        
        # Score and rank
        print(f"\n[2.6] Scoring and Ranking Questions...")
        
        weaker_scored = self._score_profile_based_questions(
            weaker_candidates, 
            skill_analyses, 
            focus_type='weaker'
        )
        
        stronger_scored = self._score_profile_based_questions(
            stronger_candidates,
            skill_analyses,
            focus_type='stronger'
        )
        
        # Allocate 70% to weaker, 30% to stronger
        k_weaker = int(k * 0.7)
        k_stronger = k - k_weaker
        
        weaker_ranked = sorted(weaker_scored, key=lambda x: x[1], reverse=True)[:k_weaker]
        stronger_ranked = sorted(stronger_scored, key=lambda x: x[1], reverse=True)[:k_stronger]
        
        # Combine recommendations
        recommendations = []
        
        for q, score, reasoning in weaker_ranked:
            recommendations.append(RecommendedQuestion(
                q_id=q._id,
                score=round(score, 2),
                reasoning=reasoning
            ))
        
        for q, score, reasoning in stronger_ranked:
            recommendations.append(RecommendedQuestion(
                q_id=q._id,
                score=round(score, 2),
                reasoning=reasoning
            ))
        
        print(f"\n[2.7] ✓ Final Recommendations:")
        print(f"      - Focus on Weaker Skills: {k_weaker} questions")
        print(f"      - Validate Stronger Skills: {k_stronger} questions")
        
        for i, rec in enumerate(recommendations):
            print(f"      {i+1}. {rec.q_id} (Score: {rec.score})")
            print(f"         → {rec.reasoning}")
        
        output = {
            "user_id": user._id,
            "session_type": "cold_start_profile_based",
            "phase": "adaptive_learning",
            "domain": chosen_domain if chosen_domain else user.target_domains[0],
            "weaker_skills_targeted": weaker_skills,
            "stronger_skills_validated": stronger_skills,
            "skill_analyses": [
                {
                    "skill": a.skill,
                    "category": a.category,
                    "avg_score": round(a.avg_score, 2),
                    "total_questions": a.total_questions,
                    "avg_time_sec": round(a.avg_time_sec, 1)
                }
                for a in skill_analyses
            ],
            "questions_recommended": [
                {
                    "q_id": r.q_id,
                    "score": r.score,
                    "reasoning": r.reasoning
                }
                for r in recommendations
            ]
        }
        
        print(f"{'='*70}\n")
        return output
    
    def _analyze_user_skills(self, user_id: str, domain: str = None) -> List[SkillAnalysis]:
        """
        Analyze user skills from response history
        Returns detailed analysis for each skill
        """
        if domain:
            responses = self.domain_responses_db[user_id].get(domain, [])
        else:
            responses = self.responses_db.get(user_id, [])
        
        if not responses:
            return []
        
        skill_data = defaultdict(lambda: {
            'scores': [],
            'times': [],
            'skips': [],
            'confidences': [],
            'keyword_matches': []
        })
        
        # Aggregate data per skill
        for response in responses:
            question = self._get_question_by_id(response.question_id)
            if not question:
                continue
            
            for tag in question.tags:
                skill_data[tag]['scores'].append(response.final_score)
                skill_data[tag]['times'].append(response.time_taken_sec)
                skill_data[tag]['skips'].append(1 if response.skipped else 0)
                if response.confidence_rating:
                    skill_data[tag]['confidences'].append(response.confidence_rating)
                if response.keyword_matches:
                    skill_data[tag]['keyword_matches'].append(
                        len(response.keyword_matches)
                    )
        
        # Create analyses
        analyses = []
        for skill, data in skill_data.items():
            if not data['scores']:
                continue
            
            avg_score = sum(data['scores']) / len(data['scores'])
            avg_time = sum(data['times']) / len(data['times']) if data['times'] else 0
            skip_rate = sum(data['skips']) / len(data['skips'])
            avg_confidence = (
                sum(data['confidences']) / len(data['confidences'])
                if data['confidences'] else None
            )
            keyword_match_rate = (
                sum(data['keyword_matches']) / len(data['keyword_matches'])
                if data['keyword_matches'] else 0
            )
            
            # Categorize skill
            if avg_score < 0.5 or skip_rate > 0.3:
                category = 'weaker'
            elif avg_score > 0.7 and skip_rate < 0.1:
                category = 'stronger'
            else:
                category = 'moderate'
            
            analyses.append(SkillAnalysis(
                skill=skill,
                avg_score=avg_score,
                total_questions=len(data['scores']),
                avg_time_sec=avg_time,
                skip_rate=skip_rate,
                avg_confidence=avg_confidence,
                keyword_match_rate=keyword_match_rate,
                category=category
            ))
        
        return sorted(analyses, key=lambda x: x.avg_score)
    
    def _score_profile_based_questions(self, questions: List[Question],
                                        skill_analyses: List[SkillAnalysis],
                                        focus_type: str) -> List[Tuple[Question, float, str]]:
        """
        Score questions based on user profile analysis
        Returns (question, score, reasoning) tuples
        """
        scored = []
        skill_map = {a.skill: a for a in skill_analyses}
        
        for question in questions:
            # Calculate metrics
            question_skills = [s for s in question.tags if s in skill_map]
            
            if not question_skills:
                continue
            
            # Get average score for question skills
            avg_skill_score = sum(
                skill_map[s].avg_score for s in question_skills
            ) / len(question_skills)
            
            # Get average time taken for these skills
            avg_time = sum(
                skill_map[s].avg_time_sec for s in question_skills
            ) / len(question_skills)
            
            # Calculate priority score
            if focus_type == 'weaker':
                # Lower score = higher priority for weaker skills
                priority = (1 - avg_skill_score)
                time_factor = min(1.0, avg_time / 180)  # Penalize if taking too long
                score = priority + (0.3 * time_factor)
                
                reasoning = (
                    f"Focus on {', '.join(question_skills)} "
                    f"(accuracy: {avg_skill_score:.0%}, "
                    f"avg time: {avg_time:.0f}s) - needs improvement"
                )
            else:
                # Higher score = validate mastery
                priority = avg_skill_score
                score = priority
                
                reasoning = (
                    f"Validate {', '.join(question_skills)} "
                    f"(strong performance: {avg_skill_score:.0%}) - maintain mastery"
                )
            
            scored.append((question, score, reasoning))
        
        return scored
    
    def build_user_profile_from_session(self, user_id: str, session_id: str, domain: str = None):
        """
        Build/Update user profile from completed session data
        """
        print(f"\n{'='*70}")
        print(f"[PROFILE BUILDING] Building User Profile from Session")
        print(f"{'='*70}")
        print(f"[PB.1] User ID: {user_id}")
        print(f"[PB.2] Session ID: {session_id}")
        print(f"[PB.3] Domain: {domain}")
        
        # Get all responses for this session
        if domain:
            session_responses = [
                r for r in self.domain_responses_db[user_id].get(domain, [])
                if r.session_id == session_id
            ]
        else:
            session_responses = [
                r for r in self.responses_db.get(user_id, [])
                if r.session_id == session_id
            ]
        
        if not session_responses:
            print(f"[PB.4] ⚠ No responses found for session")
            return
        
        print(f"[PB.4] Processing {len(session_responses)} responses...")
        
        # Analyze skills
        skill_analyses = self._analyze_user_skills(user_id, domain)
        
        # Update user profile
        if user_id in self.user_profiles:
            user_profile = self.user_profiles[user_id]
        else:
            print(f"[PB.5] ⚠ User profile not found in cache")
            return
        
        # Update profile fields
        weaker_skills = [a.skill for a in skill_analyses if a.category == 'weaker']
        stronger_skills = [a.skill for a in skill_analyses if a.category == 'stronger']
        
        if domain:
            # Store domain-specific data
            user_profile.domain_specific_weaker_skills[domain] = weaker_skills
            user_profile.domain_specific_stronger_skills[domain] = stronger_skills
            
            # Store detailed skill metadata for domain
            if domain not in user_profile.domain_specific_skill_metadata:
                user_profile.domain_specific_skill_metadata[domain] = {}
            
            for analysis in skill_analyses:
                user_profile.domain_specific_skill_metadata[domain][analysis.skill] = {
                    'avg_score': analysis.avg_score,
                    'total_questions': analysis.total_questions,
                    'avg_time_sec': analysis.avg_time_sec,
                    'skip_rate': analysis.skip_rate,
                    'category': analysis.category
                }
        else:
            # Store global data
            user_profile.weaker_skills = weaker_skills
            user_profile.stronger_skills = stronger_skills
            
            for analysis in skill_analyses:
                user_profile.skill_metadata[analysis.skill] = {
                    'avg_score': analysis.avg_score,
                    'total_questions': analysis.total_questions,
                    'avg_time_sec': analysis.avg_time_sec,
                    'skip_rate': analysis.skip_rate,
                    'category': analysis.category
                }
        
        print(f"\n[PB.5] ✓ Profile Updated:")
        print(f"      - Weaker Skills: {weaker_skills}")
        print(f"      - Stronger Skills: {stronger_skills}")
        print(f"      - Total Skills Analyzed: {len(skill_analyses)}")
        
        print(f"\n[PB.6] Detailed Skill Metadata:")
        skill_metadata = (user_profile.domain_specific_skill_metadata.get(domain, {}) 
                         if domain else user_profile.skill_metadata)
        for skill, metadata in skill_metadata.items():
            print(f"      - {skill}:")
            print(f"        Category: {metadata['category']}")
            print(f"        Avg Score: {metadata['avg_score']:.2f}")
            print(f"        Questions Answered: {metadata['total_questions']}")
            print(f"        Avg Time: {metadata['avg_time_sec']:.0f}s")
            print(f"        Skip Rate: {metadata['skip_rate']:.0%}")
        
        print(f"{'='*70}\n")
    
    # ==================== HELPER METHODS ====================
    
    def _filter_by_domain(self, target_domains: List[str]) -> List[Question]:
        """Filter questions by user's target domains"""
        return [
            q for q in self.question_bank 
            if q.domain in target_domains
        ]
    
    def _calculate_skill_overlap(self, question_tags: List[str], 
                                  user_skills: List[str]) -> float:
        """Calculate skill overlap score"""
        if not question_tags:
            return 0.0
        intersection = len(set(question_tags) & set(user_skills))
        return intersection / len(question_tags)
    
    def _get_last_score(self, user_id: str, domain: str = None) -> float:
        """Get the last response score"""
        if domain:
            responses = self.domain_responses_db[user_id].get(domain, [])
        else:
            responses = self.responses_db.get(user_id, [])
        
        if not responses:
            return 0.5
        sorted_responses = sorted(responses, key=lambda x: x.timestamp)
        return sorted_responses[-1].final_score
    
    def _calculate_difficulty_adjustment(self, last_score: float) -> int:
        """Adjust difficulty based on last score"""
        if last_score >= 0.7:
            return 1
        elif last_score < 0.4:
            return -1
        else:
            return 0
    
    def _get_attempted_questions(self, user_id: str, domain: str = None) -> Set[str]:
        """Get set of question IDs already attempted"""
        if domain:
            responses = self.domain_responses_db[user_id].get(domain, [])
        else:
            responses = self.responses_db.get(user_id, [])
        return {response.question_id for response in responses}
    
    def _filter_candidate_questions(self, target_domains: List[str],
                                     skills: List[str],
                                     target_difficulty: int,
                                     attempted_questions: Set[str]) -> List[Question]:
        """Filter questions based on criteria"""
        candidates = []
        for question in self.question_bank:
            if question.domain not in target_domains:
                continue
            if not set(question.tags) & set(skills):
                continue
            if abs(question.difficulty - target_difficulty) > 1:
                continue
            if question._id in attempted_questions:
                continue
            candidates.append(question)
        return candidates
    
    def _get_question_by_id(self, question_id: str) -> Optional[Question]:
        """Retrieve question by ID"""
        for q in self.question_bank:
            if q._id == question_id:
                return q
        return None
    
    # ==================== LOGGING METHODS ====================
    
    def log_response(self, response: QuestionResponse):
        """Log user response with enhanced metadata"""
        print(f"\n{'─'*70}")
        print(f"[RESPONSE LOG] Logging Enhanced Response")
        print(f"{'─'*70}")
        print(f"- User ID: {response.user_id}")
        print(f"- Question ID: {response.question_id}")
        print(f"- Final Score: {response.final_score:.2f}")
        print(f"- Time Taken: {response.time_taken_sec}s")
        print(f"- Skipped: {response.skipped}")
        print(f"- Confidence: {response.confidence_rating}")
        print(f"- Keyword Matches: {response.keyword_matches}")
        print(f"- Domain: {response.domain}")
        
        # Store in both global and domain-specific databases
        self.responses_db[response.user_id].append(response)
        if response.domain:
            self.domain_responses_db[response.user_id][response.domain].append(response)
        
        print(f"✓ Response logged")
        print(f"{'─'*70}\n")
    
    def log_session(self, session: Session):
        """Log session with enhanced tracking"""
        print(f"\n{'─'*70}")
        print(f"[SESSION LOG] Logging Enhanced Session")
        print(f"{'─'*70}")
        print(f"- Session ID: {session._id}")
        print(f"- User ID: {session.user_id}")
        print(f"- Total Time: {session.total_time_sec}s")
        print(f"- Questions Skipped: {session.questions_skipped}")
        print(f"- Weaker Skills: {session.weaker_skills_identified}")
        print(f"- Stronger Skills: {session.stronger_skills_identified}")
        print(f"- Domain: {session.domain}")
        
        # Store in both global and domain-specific databases
        self.sessions_db[session.user_id].append(session)
        if session.domain:
            self.domain_sessions_db[session.user_id][session.domain].append(session)
        
        print(f"✓ Session logged")
        print(f"{'─'*70}\n")
    
    # ==================== MAIN ENTRY POINT ====================
    
    def recommend_questions(self, user: UserProfile, k: int = 10, chosen_domain: str = None) -> Dict:
        """
        Main entry point - Enhanced Cold Start with Profile Building
        """
        # Store user profile
        self.user_profiles[user._id] = user
        
        # Check if first session for this domain
        if chosen_domain:
            sessions = self.domain_sessions_db[user._id].get(chosen_domain, [])
        else:
            sessions = self.sessions_db.get(user._id, [])
        
        is_first_session = len(sessions) == 0
        
        return self.enhanced_cold_start_flow(user, k, is_first_session, chosen_domain)


# ==================== EXAMPLE USAGE ====================

def main():
    """Demonstration of Enhanced Cold Start System with Domain-Specific Learning"""
    
    # Create sample questions for multiple domains
    questions = [
        # Java questions
        Question(
            _id="q_java_0001", text="Explain Java inheritance",
            tags=["java", "oop"], domain="java",
            difficulty=2, estimated_time_sec=90
        ),
        Question(
            _id="q_java_0002", text="Java collections framework",
            tags=["java", "data-structures"], domain="java",
            difficulty=3, estimated_time_sec=120
        ),
        Question(
            _id="q_java_0003", text="Java threading concepts",
            tags=["java", "concurrency"], domain="java",
            difficulty=4, estimated_time_sec=150
        ),
        Question(
            _id="q_java_0004", text="Exception handling in Java",
            tags=["java", "error-handling"], domain="java",
            difficulty=2, estimated_time_sec=80
        ),
        # Python questions
        Question(
            _id="q_py_0001", text="Python decorators explained",
            tags=["python", "advanced"], domain="python",
            difficulty=3, estimated_time_sec=100
        ),
        Question(
            _id="q_py_0002", text="List comprehensions in Python",
            tags=["python", "data-structures"], domain="python",
            difficulty=2, estimated_time_sec=70
        ),
        Question(
            _id="q_py_0003", text="Python generators and iterators",
            tags=["python", "advanced"], domain="python",
            difficulty=4, estimated_time_sec=130
        ),
        Question(
            _id="q_py_0004", text="Python async/await",
            tags=["python", "concurrency"], domain="python",
            difficulty=4, estimated_time_sec=140
        ),
        # ML questions
        Question(
            _id="q_ml_0001", text="Explain TF-IDF",
            tags=["nlp", "information-retrieval"], domain="ml",
            difficulty=2, estimated_time_sec=90
        ),
        Question(
            _id="q_ml_0002", text="Binary search algorithm",
            tags=["data-structures", "algorithms"], domain="ml",
            difficulty=1, estimated_time_sec=60
        ),
        Question(
            _id="q_ml_0003", text="Backpropagation explained",
            tags=["deep-learning", "tensorflow"], domain="ml",
            difficulty=3, estimated_time_sec=120
        ),
        Question(
            _id="q_ml_0004", text="Word embedding concepts",
            tags=["nlp", "deep-learning"], domain="ml",
            difficulty=2, estimated_time_sec=100
        ),
        Question(
            _id="q_ml_0005", text="Neural network in Python",
            tags=["python", "deep-learning"], domain="ml",
            difficulty=3, estimated_time_sec=180
        ),
        Question(
            _id="q_ml_0006", text="Data preprocessing techniques",
            tags=["python", "data-structures"], domain="ml",
            difficulty=2, estimated_time_sec=90
        ),
    ]
    
    # Create user
    user = UserProfile(
        _id="user_123", name="Pavan", email="pavan@example.com",
        skills=["python", "java", "nlp", "tensorflow", "data-structures", "oop"],
        skill_levels={}, years_experience=2.5, education="B.Tech",
        target_domains=["java", "python", "ml"], target_companies=["CompanyA"],
        created_at="2025-09-01T12:00:00Z",
        last_active="2025-10-04T10:00:00Z"
    )
    
    # Initialize system
    system = InterviewSystem(questions)
    
    print("=" * 70)
    print("DOMAIN-SPECIFIC LEARNING SYSTEM - DEMONSTRATION")
    print("=" * 70)
    
    # ===== JAVA - FIRST SESSION =====
    print("\n[DEMO] JAVA DOMAIN - FIRST SESSION")
    print("=" * 70)
    java_recommendations = system.recommend_questions(user, k=3, chosen_domain="java")
    print(json.dumps(java_recommendations, indent=2))
    
    # Simulate Java session responses
    print("\n[DEMO] Simulating Java session responses...")
    
    java_session = Session(
        _id="session_java_001", user_id="user_123",
        context={"company": "CompanyA", "domain": "java"},
        questions=[],
        created_at="2025-10-04T10:00:00Z",
        metrics_snapshot={},
        total_time_sec=300,
        questions_skipped=1,
        domain="java"
    )
    
    java_responses = [
        QuestionResponse(
            _id="resp_java_001", session_id="session_java_001", user_id="user_123",
            question_id="q_java_0001", response_text="Java inheritance explanation...",
            response_audio_features={"wpm": 105, "silence_ratio": 0.18},
            content_metrics={"relevance": 0.5, "grammar": 0.6},
            final_score=0.40,  # Weak in Java OOP
            timestamp="2025-10-04T10:02:00Z",
            time_taken_sec=110,
            skipped=False,
            confidence_rating=2,
            keyword_matches=["inheritance", "extends"],
            domain="java"
        ),
        QuestionResponse(
            _id="resp_java_002", session_id="session_java_001", user_id="user_123",
            question_id="q_java_0002", response_text="",
            response_audio_features={},
            content_metrics={},
            final_score=0.0,  # Skipped
            timestamp="2025-10-04T10:04:00Z",
            time_taken_sec=5,
            skipped=True,
            confidence_rating=None,
            keyword_matches=[],
            domain="java"
        ),
        QuestionResponse(
            _id="resp_java_003", session_id="session_java_001", user_id="user_123",
            question_id="q_java_0004", response_text="Exception handling explanation...",
            response_audio_features={"wpm": 115, "silence_ratio": 0.12},
            content_metrics={"relevance": 0.7, "grammar": 0.75},
            final_score=0.65,
            timestamp="2025-10-04T10:06:00Z",
            time_taken_sec=85,
            skipped=False,
            confidence_rating=3,
            keyword_matches=["try", "catch", "finally", "throws"],
            domain="java"
        ),
    ]
    
    system.log_session(java_session)
    for response in java_responses:
        system.log_response(response)
    
    system.build_user_profile_from_session("user_123", "session_java_001", "java")
    
    # ===== PYTHON - FIRST SESSION (Independent from Java) =====
    print("\n[DEMO] PYTHON DOMAIN - FIRST SESSION (Independent)")
    print("=" * 70)
    python_recommendations = system.recommend_questions(user, k=3, chosen_domain="python")
    print(json.dumps(python_recommendations, indent=2))
    
    # Simulate Python session responses
    print("\n[DEMO] Simulating Python session responses...")
    
    python_session = Session(
        _id="session_py_001", user_id="user_123",
        context={"company": "CompanyA", "domain": "python"},
        questions=[],
        created_at="2025-10-04T11:00:00Z",
        metrics_snapshot={},
        total_time_sec=280,
        questions_skipped=0,
        domain="python"
    )
    
    python_responses = [
        QuestionResponse(
            _id="resp_py_001", session_id="session_py_001", user_id="user_123",
            question_id="q_py_0001", response_text="Decorators explanation...",
            response_audio_features={"wpm": 120, "silence_ratio": 0.10},
            content_metrics={"relevance": 0.85, "grammar": 0.9},
            final_score=0.80,  # Strong in Python
            timestamp="2025-10-04T11:02:00Z",
            time_taken_sec=95,
            skipped=False,
            confidence_rating=4,
            keyword_matches=["decorator", "wrapper", "@", "function"],
            domain="python"
        ),
        QuestionResponse(
            _id="resp_py_002", session_id="session_py_001", user_id="user_123",
            question_id="q_py_0002", response_text="List comprehension explanation...",
            response_audio_features={"wpm": 125, "silence_ratio": 0.08},
            content_metrics={"relevance": 0.9, "grammar": 0.95},
            final_score=0.88,  # Very strong
            timestamp="2025-10-04T11:04:00Z",
            time_taken_sec=65,
            skipped=False,
            confidence_rating=5,
            keyword_matches=["comprehension", "list", "for", "if", "syntax"],
            domain="python"
        ),
        QuestionResponse(
            _id="resp_py_003", session_id="session_py_001", user_id="user_123",
            question_id="q_py_0003", response_text="Generators explanation...",
            response_audio_features={"wpm": 118, "silence_ratio": 0.12},
            content_metrics={"relevance": 0.75, "grammar": 0.8},
            final_score=0.72,
            timestamp="2025-10-04T11:07:00Z",
            time_taken_sec=120,
            skipped=False,
            confidence_rating=4,
            keyword_matches=["yield", "generator", "iterator", "lazy"],
            domain="python"
        ),
    ]
    
    system.log_session(python_session)
    for response in python_responses:
        system.log_response(response)
    
    system.build_user_profile_from_session("user_123", "session_py_001", "python")
    
    # ===== JAVA - SECOND SESSION (Uses only Java history) =====
    print("\n[DEMO] JAVA DOMAIN - SECOND SESSION (Profile-Based)")
    print("=" * 70)
    java_recommendations_2 = system.recommend_questions(user, k=3, chosen_domain="java")
    print(json.dumps(java_recommendations_2, indent=2))
    
    # ===== PYTHON - SECOND SESSION (Uses only Python history) =====
    print("\n[DEMO] PYTHON DOMAIN - SECOND SESSION (Profile-Based)")
    print("=" * 70)
    python_recommendations_2 = system.recommend_questions(user, k=3, chosen_domain="python")
    print(json.dumps(python_recommendations_2, indent=2))
    
    # ===== ML DOMAIN - FIRST SESSION (Independent) =====
    print("\n[DEMO] ML DOMAIN - FIRST SESSION (New Domain)")
    print("=" * 70)
    ml_recommendations = system.recommend_questions(user, k=3, chosen_domain="ml")
    print(json.dumps(ml_recommendations, indent=2))
    
    # Show domain-specific profiles
    print("\n[DEMO] DOMAIN-SPECIFIC USER PROFILES")
    print("=" * 70)
    user_profile = system.user_profiles.get("user_123")
    if user_profile:
        print("\n--- JAVA PROFILE ---")
        if "java" in user_profile.domain_specific_skill_metadata:
            print(f"Weaker Skills: {user_profile.domain_specific_weaker_skills.get('java', [])}")
            print(f"Stronger Skills: {user_profile.domain_specific_stronger_skills.get('java', [])}")
            print("\nDetailed Metrics:")
            for skill, metadata in user_profile.domain_specific_skill_metadata["java"].items():
                print(f"  {skill}:")
                print(f"    Category: {metadata['category']}")
                print(f"    Avg Score: {metadata['avg_score']:.1%}")
                print(f"    Questions: {metadata['total_questions']}")
                print(f"    Avg Time: {metadata['avg_time_sec']:.0f}s")
        
        print("\n--- PYTHON PROFILE ---")
        if "python" in user_profile.domain_specific_skill_metadata:
            print(f"Weaker Skills: {user_profile.domain_specific_weaker_skills.get('python', [])}")
            print(f"Stronger Skills: {user_profile.domain_specific_stronger_skills.get('python', [])}")
            print("\nDetailed Metrics:")
            for skill, metadata in user_profile.domain_specific_skill_metadata["python"].items():
                print(f"  {skill}:")
                print(f"    Category: {metadata['category']}")
                print(f"    Avg Score: {metadata['avg_score']:.1%}")
                print(f"    Questions: {metadata['total_questions']}")
                print(f"    Avg Time: {metadata['avg_time_sec']:.0f}s")
    
    print("\n" + "=" * 70)
    print("DEMONSTRATION COMPLETE")
    print("=" * 70)
    print("\nKEY FEATURES DEMONSTRATED:")
    print("✓ Domain-Specific Tracking: Java and Python tracked separately")
    print("✓ Independent Learning: Java performance doesn't affect Python")
    print("✓ Per-Domain Profiles: Each domain has its own weak/strong skills")
    print("✓ Per-Domain History: Recommendations use only relevant domain data")
    print("✓ Multi-Domain Support: User can learn Java, Python, ML independently")
    print("✓ Domain Isolation: First session in ML is independent of Java/Python")
    print("=" * 70)


if __name__ == "__main__":
    main()


[INIT] Enhanced Interview System Initialized
[INIT] Total Questions in Bank: 14

DOMAIN-SPECIFIC LEARNING SYSTEM - DEMONSTRATION

[DEMO] JAVA DOMAIN - FIRST SESSION

[ENHANCED COLD START] Profile-Building Flow
[0.1] User: user_123
[0.2] Is First Session: True
[0.3] Chosen Domain: java

[PHASE 1] First Session - Comprehensive Assessment
──────────────────────────────────────────────────────────────────────
[1.1] Goal: Assess all user skills comprehensively
[1.2] User Skills to Assess: ['python', 'java', 'nlp', 'tensorflow', 'data-structures', 'oop']

[1.3] Filtering by domain: ['java']
      ✓ 4 questions in target domains

[1.4] Creating diverse question set...
      - Skill Coverage Analysis:
        java: 4 questions available
        oop: 1 questions available
        data-structures: 1 questions available
      - Target: 1 questions per skill
      ✓ Selected 3 diverse questions

[1.5] Scoring questions for initial assessment...

      Question 1/3: q_java_0001
      - Tags: ['jav