In [1]:
!nvidia-smi

Tue Aug  5 16:24:14 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100 80GB PCIe          On  | 00000000:40:00.0 Off |                    0 |
| N/A   40C    P0              56W / 300W |      4MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [1]:
!pip install flask flask_cors

Defaulting to user installation because normal site-packages is not writeable


## 1.Setup and Imports

In [2]:
# Core imports
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
import pickle
import json
from typing import Dict, List, Tuple, Optional, Any
from dataclasses import dataclass, asdict
from datetime import datetime, timedelta
import uuid
from collections import defaultdict, deque
import random
from enum import Enum
import logging
from pathlib import Path

# Web framework imports (for API)
from flask import Flask, request, jsonify, Response
from flask_cors import CORS
import threading
import time

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('AdaptiveTutorAgent')

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Verify model files exist
model_files = {
    'REINFORCE': 'reinforce_model.pth',
    'Thompson': 'thompson_sampler.pkl',
    'Metrics': 'training_metrics.json'
}

for name, filepath in model_files.items():
    if Path(filepath).exists():
        print(f"✅ {name} model found: {filepath}")
    else:
        print(f"⚠️ {name} model not found: {filepath}")
        print(f"   Please ensure you've run the training notebook first.")

Using device: cuda
✅ REINFORCE model found: reinforce_model.pth
✅ Thompson model found: thompson_sampler.pkl
✅ Metrics model found: training_metrics.json


## 2. Core Agent Components
Define the core data structures and neural network architecture.

In [3]:
# Enums for clarity
class DifficultyLevel(Enum):
    VERY_EASY = 0
    EASY = 1
    MEDIUM = 2
    HARD = 3
    VERY_HARD = 4

class StudentState(Enum):
    NEW = "new"
    LEARNING = "learning"
    STRUGGLING = "struggling"
    MASTERING = "mastering"
    ADVANCED = "advanced"

@dataclass
class Question:
    """Represents a question/problem in the system"""
    id: str
    topic: str
    difficulty: float
    content: str
    answer: Any
    hints: List[str]
    explanation: str
    metadata: Dict[str, Any] = None

@dataclass
class StudentProfile:
    """Complete student profile with learning history"""
    id: str
    name: str
    created_at: datetime
    last_active: datetime
    total_sessions: int
    total_questions: int
    overall_accuracy: float
    skill_level: float
    learning_rate: float
    knowledge_state: Dict[str, float]  # topic -> mastery
    performance_history: List[Dict]
    current_state: StudentState
    preferences: Dict[str, Any]

@dataclass
class SessionData:
    """Data for a single learning session"""
    session_id: str
    student_id: str
    start_time: datetime
    end_time: Optional[datetime]
    questions_attempted: int
    correct_answers: int
    topics_covered: List[str]
    difficulty_progression: List[float]
    rewards_earned: float
    session_state: Dict[str, Any]

# Recreate the PolicyNetwork architecture (must match training)
class PolicyNetwork(nn.Module):
    """Neural network for REINFORCE algorithm"""
    
    def __init__(self, state_dim=10, hidden_dim=32, action_dim=5):
        super(PolicyNetwork, self).__init__()
        
        self.fc1 = nn.Linear(state_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, action_dim)
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        action_logits = self.fc3(x)
        action_probs = F.softmax(action_logits, dim=-1)
        return action_probs

# Question Bank Manager
class QuestionBank:
    """Manages the question database"""
    
    def __init__(self):
        self.questions = defaultdict(list)  # topic -> list of questions
        self.topics = []
        self._initialize_questions()
    
    def _initialize_questions(self):
        """Create a sample question bank"""
        # Mathematics topics
        math_topics = [
            "algebra_basics", "linear_equations", "quadratic_equations",
            "geometry", "trigonometry", "calculus_intro"
        ]
        
        # Generate questions for each topic
        for topic in math_topics:
            for i in range(20):  # 20 questions per topic
                difficulty = np.random.uniform(0.1, 1.0)
                self.questions[topic].append(Question(
                    id=f"{topic}_{i}",
                    topic=topic,
                    difficulty=difficulty,
                    content=f"Question about {topic.replace('_', ' ')} (Level {difficulty:.1f})",
                    answer=f"answer_{i}",
                    hints=[f"Hint 1 for {topic}", f"Hint 2 for {topic}"],
                    explanation=f"This tests understanding of {topic.replace('_', ' ')}",
                    metadata={"sub_topic": topic, "created": datetime.now()}
                ))
        
        self.topics = list(self.questions.keys())
        logger.info(f"Question bank initialized with {len(self.topics)} topics")
    
    def get_questions_by_difficulty(self, topic: str, difficulty: float, 
                                   tolerance: float = 0.1) -> List[Question]:
        """Get questions near a target difficulty"""
        if topic not in self.questions:
            return []
        
        filtered = [
            q for q in self.questions[topic]
            if abs(q.difficulty - difficulty) <= tolerance
        ]
        
        if not filtered:  # If no questions in range, get closest
            filtered = sorted(
                self.questions[topic],
                key=lambda q: abs(q.difficulty - difficulty)
            )[:3]
        
        return filtered
    
    def get_random_question(self, topic: str, difficulty: float) -> Optional[Question]:
        """Get a random question matching criteria"""
        candidates = self.get_questions_by_difficulty(topic, difficulty)
        return random.choice(candidates) if candidates else None

## 3. Adaptive Tutorial Agent Implementation
The main agent class that orchestrates learning using your trained models.

In [13]:
class AdaptiveTutorialAgent:
    """
    Production-ready Adaptive Tutorial Agent using trained RL models.
    Implements personalized teaching strategies using Thompson Sampling + REINFORCE.
    """
    
    def __init__(self, model_dir: str = "./"):
        self.model_dir = Path(model_dir)
        self.device = device
        
        # Initialize components
        self.question_bank = QuestionBank()
        self.student_profiles = {}
        self.active_sessions = {}
        
        # Load trained models
        self._load_models()
        
        # Performance tracking
        self.performance_monitor = PerformanceMonitor()
        
        # Configuration
        self.config = {
            'min_questions_per_session': 5,
            'max_questions_per_session': 30,
            'mastery_threshold': 0.85,
            'struggle_threshold': 0.40,
            'difficulty_adjustment_rate': 0.1,
            'thompson_weight': 0.3,  # Weight for Thompson vs REINFORCE
        }
        
        logger.info("Adaptive Tutorial Agent initialized successfully")
    
    def _load_models(self):
        """Load trained Thompson Sampler and REINFORCE models"""
        
        # Load REINFORCE model
        try:
            # Create the network architecture first
            self.policy_network = PolicyNetwork(10, 32, 5).to(self.device)
            
            # Load the saved checkpoint file
            checkpoint_path = self.model_dir / 'reinforce_model.pth'
            
            # Load checkpoint with weights_only=False
            checkpoint = torch.load(
                checkpoint_path,
                map_location=self.device,
                weights_only=False
            )
            
            # Load the weights into the network
            self.policy_network.load_state_dict(checkpoint['policy_state_dict'])
            
            # Set to evaluation mode
            self.policy_network.eval()
            
            logger.info("✅ REINFORCE model loaded successfully")
            
        except FileNotFoundError:
            logger.warning(f"REINFORCE model not found at {self.model_dir / 'reinforce_model.pth'}")
            logger.info("Initializing with random weights")
            self.policy_network = PolicyNetwork(10, 32, 5).to(self.device)
            
        except Exception as e:
            logger.error(f"Failed to load REINFORCE model: {e}")
            logger.info("Initializing with random weights as fallback")
            self.policy_network = PolicyNetwork(10, 32, 5).to(self.device)
        
        # Load Thompson Sampler
        try:
            thompson_path = self.model_dir / 'thompson_sampler.pkl'
            
            with open(thompson_path, 'rb') as f:
                thompson_data = pickle.load(f)
                
            self.thompson_alpha = defaultdict(
                lambda: np.ones(5),
                thompson_data.get('alpha', {})
            )
            self.thompson_beta = defaultdict(
                lambda: np.ones(5),
                thompson_data.get('beta', {})
            )
            self.difficulty_bins = thompson_data.get(
                'difficulty_bins',
                np.linspace(0.1, 1.0, 5)
            )
            
            logger.info("✅ Thompson Sampler loaded successfully")
            
        except FileNotFoundError:
            logger.warning(f"Thompson Sampler not found at {self.model_dir / 'thompson_sampler.pkl'}")
            logger.info("Initializing with default priors")
            self.thompson_alpha = defaultdict(lambda: np.ones(5))
            self.thompson_beta = defaultdict(lambda: np.ones(5))
            self.difficulty_bins = np.linspace(0.1, 1.0, 5)
            
        except Exception as e:
            logger.error(f"Failed to load Thompson Sampler: {e}")
            logger.info("Initializing with default priors as fallback")
            self.thompson_alpha = defaultdict(lambda: np.ones(5))
            self.thompson_beta = defaultdict(lambda: np.ones(5))
            self.difficulty_bins = np.linspace(0.1, 1.0, 5)
    
    def create_student_profile(self, student_id: str, name: str = None) -> StudentProfile:
        """Create a new student profile"""
        
        profile = StudentProfile(
            id=student_id,
            name=name or f"Student_{student_id}",
            created_at=datetime.now(),
            last_active=datetime.now(),
            total_sessions=0,
            total_questions=0,
            overall_accuracy=0.5,
            skill_level=0.5,
            learning_rate=0.1,
            knowledge_state={topic: 0.5 for topic in self.question_bank.topics},
            performance_history=[],
            current_state=StudentState.NEW,
            preferences={'preferred_session_length': 15}
        )
        
        self.student_profiles[student_id] = profile
        logger.info(f"Created new profile for student: {student_id}")
        
        return profile
    
    def start_session(self, student_id: str) -> SessionData:
        """Start a new learning session for a student"""
        
        # Get or create student profile
        if student_id not in self.student_profiles:
            self.create_student_profile(student_id)
        
        profile = self.student_profiles[student_id]
        profile.last_active = datetime.now()
        profile.total_sessions += 1
        
        # Create new session
        session = SessionData(
            session_id=str(uuid.uuid4()),
            student_id=student_id,
            start_time=datetime.now(),
            end_time=None,
            questions_attempted=0,
            correct_answers=0,
            topics_covered=[],
            difficulty_progression=[],
            rewards_earned=0.0,
            session_state={}
        )
        
        self.active_sessions[session.session_id] = session
        logger.info(f"Started session {session.session_id} for student {student_id}")
        
        return session
    
    def _encode_state(self, student_id: str, session_id: str) -> torch.Tensor:
        """Encode current state for neural network"""
        
        profile = self.student_profiles[student_id]
        session = self.active_sessions.get(session_id)
        
        # Calculate recent performance
        recent_history = profile.performance_history[-10:] if profile.performance_history else []
        recent_accuracy = np.mean([h['correct'] for h in recent_history]) if recent_history else 0.5
        
        # Calculate current session stats
        session_accuracy = (session.correct_answers / max(1, session.questions_attempted) 
                          if session else 0.5)
        
        # Average topic knowledge
        avg_knowledge = np.mean(list(profile.knowledge_state.values()))
        
        # Create state vector
        state = np.array([
            profile.skill_level,
            recent_accuracy,
            avg_knowledge,
            session.questions_attempted / 30 if session else 0,  # Normalized
            session_accuracy,
            session.difficulty_progression[-1] if session and session.difficulty_progression else 0.5,
            min(1.0, (datetime.now() - profile.last_active).seconds / 3600),  # Time since last
            len([h for h in recent_history if h['correct']]) / 10,  # Recent streak
            0.0,  # Placeholder for hint usage
            session.questions_attempted / 20 if session else 0,  # Session progress
        ])
        
        return torch.FloatTensor(state).unsqueeze(0).to(self.device)
    
    def get_next_question(self, session_id: str) -> Dict[str, Any]:
        """
        Get the next optimal question for the student using RL models.
        This is the core method that uses Thompson Sampling + REINFORCE.
        """
        
        session = self.active_sessions.get(session_id)
        if not session:
            return {"error": "Invalid session ID"}
        
        student_id = session.student_id
        profile = self.student_profiles[student_id]
        
        # 1. Encode current state
        state_tensor = self._encode_state(student_id, session_id)
        
        # 2. Thompson Sampling for exploration
        thompson_samples = np.zeros(5)
        for i in range(5):
            thompson_samples[i] = np.random.beta(
                self.thompson_alpha[student_id][i],
                self.thompson_beta[student_id][i]
            )
        thompson_action = np.argmax(thompson_samples)
        thompson_difficulty = self.difficulty_bins[thompson_action]
        
        # 3. REINFORCE for exploitation
        with torch.no_grad():
            action_probs = self.policy_network(state_tensor)
            rl_action = torch.argmax(action_probs).item()
            rl_difficulty = self.difficulty_bins[rl_action]
            
            # Get confidence (entropy)
            entropy = -torch.sum(action_probs * torch.log(action_probs + 1e-8)).item()
        
        # 4. Combine both methods
        weight = self.config['thompson_weight']
        final_difficulty = weight * thompson_difficulty + (1 - weight) * rl_difficulty
        
        # 5. Select topic based on knowledge gaps
        topic_weights = []
        for topic in self.question_bank.topics:
            # Prioritize topics with lower knowledge
            knowledge = profile.knowledge_state.get(topic, 0.5)
            weight = 1.0 - knowledge + 0.1  # Add small constant to avoid zero
            topic_weights.append(weight)
        
        # Normalize weights
        topic_weights = np.array(topic_weights)
        topic_weights = topic_weights / topic_weights.sum()
        
        # Select topic
        selected_topic = np.random.choice(
            self.question_bank.topics,
            p=topic_weights
        )
        
        # 6. Get appropriate question
        question = self.question_bank.get_random_question(selected_topic, final_difficulty)
        
        if not question:
            # Fallback to any question
            question = random.choice(self.question_bank.questions[selected_topic])
        
        # 7. Update session state
        session.questions_attempted += 1
        session.difficulty_progression.append(question.difficulty)
        if selected_topic not in session.topics_covered:
            session.topics_covered.append(selected_topic)
        
        # 8. Prepare response
        response = {
            'session_id': session_id,
            'question_id': question.id,
            'question': question.content,
            'topic': question.topic,
            'difficulty': question.difficulty,
            'hints_available': len(question.hints),
            'metadata': {
                'thompson_difficulty': thompson_difficulty,
                'rl_difficulty': rl_difficulty,
                'final_difficulty': final_difficulty,
                'policy_entropy': entropy,
                'student_skill': profile.skill_level,
                'topic_knowledge': profile.knowledge_state.get(selected_topic, 0.5)
            }
        }
        
        logger.info(f"Question selected for session {session_id}: "
                   f"Topic={selected_topic}, Difficulty={question.difficulty:.2f}")
        
        return response
    
    def submit_answer(self, session_id: str, question_id: str, 
                     answer: str, time_taken: float = None) -> Dict[str, Any]:
        """Process student's answer and update models"""
        
        session = self.active_sessions.get(session_id)
        if not session:
            return {"error": "Invalid session ID"}
        
        student_id = session.student_id
        profile = self.student_profiles[student_id]
        
        # Find the question
        question = None
        for topic_questions in self.question_bank.questions.values():
            for q in topic_questions:
                if q.id == question_id:
                    question = q
                    break
        
        if not question:
            return {"error": "Question not found"}
        
        # Evaluate answer (simplified - in production, use proper evaluation)
        correct = (answer.lower().strip() == question.answer.lower().strip())
        
        # Calculate success probability based on student skill and question difficulty
        expected_prob = 1.0 / (1.0 + np.exp(5 * (question.difficulty - profile.skill_level)))
        
        # Update Thompson Sampler
        difficulty_bin = np.argmin(np.abs(self.difficulty_bins - question.difficulty))
        if correct:
            self.thompson_alpha[student_id][difficulty_bin] += 1
            session.correct_answers += 1
        else:
            self.thompson_beta[student_id][difficulty_bin] += 1
        
        # Update student knowledge state
        old_knowledge = profile.knowledge_state.get(question.topic, 0.5)
        if correct:
            # Learning occurs
            improvement = profile.learning_rate * (1 - old_knowledge) * 0.3
            profile.knowledge_state[question.topic] = min(1.0, old_knowledge + improvement)
        else:
            # Small learning from mistakes
            improvement = profile.learning_rate * 0.05
            profile.knowledge_state[question.topic] = min(1.0, old_knowledge + improvement)
        
        # Update student skill level (moving average)
        profile.skill_level = 0.9 * profile.skill_level + 0.1 * session.correct_answers / max(1, session.questions_attempted)
        
        # Calculate reward for this interaction
        reward = self._calculate_reward(correct, question.difficulty, profile.skill_level)
        session.rewards_earned += reward
        
        # Update performance history
        profile.performance_history.append({
            'timestamp': datetime.now(),
            'question_id': question_id,
            'topic': question.topic,
            'difficulty': question.difficulty,
            'correct': correct,
            'time_taken': time_taken,
            'expected_prob': expected_prob
        })
        
        profile.total_questions += 1
        profile.overall_accuracy = (
            profile.overall_accuracy * (profile.total_questions - 1) + int(correct)
        ) / profile.total_questions
        
        # Update student state
        self._update_student_state(profile)
        
        # Prepare response
        response = {
            'correct': correct,
            'explanation': question.explanation,
            'correct_answer': question.answer if not correct else None,
            'reward': reward,
            'updated_skill_level': profile.skill_level,
            'topic_mastery': profile.knowledge_state[question.topic],
            'session_stats': {
                'questions_attempted': session.questions_attempted,
                'correct_answers': session.correct_answers,
                'accuracy': session.correct_answers / session.questions_attempted,
                'total_rewards': session.rewards_earned
            }
        }
        
        logger.info(f"Answer submitted for session {session_id}: "
                   f"Correct={correct}, Reward={reward:.2f}")
        
        return response
    
    def _calculate_reward(self, correct: bool, difficulty: float, skill_level: float) -> float:
        """Calculate reward for the RL system"""
        
        # Base reward
        base_reward = 1.0 if correct else -0.3
        
        # Difficulty appropriateness bonus
        optimal_difficulty = skill_level + 0.1  # Slightly above skill level
        diff_distance = abs(difficulty - optimal_difficulty)
        difficulty_bonus = np.exp(-5 * diff_distance) * 0.3
        
        # Learning efficiency
        if correct and difficulty > skill_level:
            learning_bonus = 0.5  # Succeeded on harder problem
        elif not correct and difficulty < skill_level - 0.2:
            learning_bonus = -0.3  # Failed on much easier problem
        else:
            learning_bonus = 0
        
        total_reward = base_reward + difficulty_bonus + learning_bonus
        
        return np.clip(total_reward, -1, 2)
    
    def _update_student_state(self, profile: StudentProfile):
        """Update student's learning state based on performance"""
        
        recent_history = profile.performance_history[-20:] if len(profile.performance_history) >= 20 else profile.performance_history
        
        if not recent_history:
            profile.current_state = StudentState.NEW
            return
        
        recent_accuracy = np.mean([h['correct'] for h in recent_history])
        
        # Determine state
        if recent_accuracy >= self.config['mastery_threshold']:
            profile.current_state = StudentState.MASTERING
            if profile.skill_level > 0.8:
                profile.current_state = StudentState.ADVANCED
        elif recent_accuracy <= self.config['struggle_threshold']:
            profile.current_state = StudentState.STRUGGLING
        else:
            profile.current_state = StudentState.LEARNING
    
    def end_session(self, session_id: str) -> Dict[str, Any]:
        """End a learning session and generate summary"""
        
        session = self.active_sessions.get(session_id)
        if not session:
            return {"error": "Invalid session ID"}
        
        session.end_time = datetime.now()
        duration = (session.end_time - session.start_time).total_seconds()
        
        student_id = session.student_id
        profile = self.student_profiles[student_id]
        
        # Generate session summary
        summary = {
            'session_id': session_id,
            'student_id': student_id,
            'duration_seconds': duration,
            'questions_attempted': session.questions_attempted,
            'correct_answers': session.correct_answers,
            'accuracy': session.correct_answers / max(1, session.questions_attempted),
            'topics_covered': session.topics_covered,
            'avg_difficulty': np.mean(session.difficulty_progression) if session.difficulty_progression else 0,
            'total_rewards': session.rewards_earned,
            'skill_progression': {
                'start': session.difficulty_progression[0] if session.difficulty_progression else 0,
                'end': session.difficulty_progression[-1] if session.difficulty_progression else 0,
                'change': (session.difficulty_progression[-1] - session.difficulty_progression[0]) if len(session.difficulty_progression) > 1 else 0
            },
            'student_state': profile.current_state.value,
            'recommendations': self._generate_recommendations(profile, session)
        }
        
        # Store session data (in production, save to database)
        profile.last_active = datetime.now()
        
        # Remove from active sessions
        del self.active_sessions[session_id]
        
        logger.info(f"Session {session_id} ended. Duration: {duration:.1f}s, "
                   f"Accuracy: {summary['accuracy']:.1%}")
        
        return summary
    
    def _generate_recommendations(self, profile: StudentProfile, 
                                 session: SessionData) -> List[str]:
        """Generate personalized recommendations for the student"""
        
        recommendations = []
        
        # Based on current state
        if profile.current_state == StudentState.STRUGGLING:
            recommendations.append("Consider reviewing foundational concepts")
            recommendations.append("Try easier problems to build confidence")
        elif profile.current_state == StudentState.MASTERING:
            recommendations.append("Ready for more challenging material")
            recommendations.append("Consider exploring advanced topics")
        elif profile.current_state == StudentState.ADVANCED:
            recommendations.append("Excellent progress! Try competition-level problems")
        
        # Based on knowledge gaps
        weak_topics = [
            topic for topic, knowledge in profile.knowledge_state.items()
            if knowledge < 0.4
        ]
        if weak_topics:
            recommendations.append(f"Focus on improving: {', '.join(weak_topics[:3])}")
        
        # Based on session performance
        if session.correct_answers / max(1, session.questions_attempted) < 0.5:
            recommendations.append("Take breaks between difficult problems")
        
        return recommendations
    
    def get_student_analytics(self, student_id: str) -> Dict[str, Any]:
        """Get detailed analytics for a student"""
        
        if student_id not in self.student_profiles:
            return {"error": "Student not found"}
        
        profile = self.student_profiles[student_id]
        
        # Calculate statistics
        recent_history = profile.performance_history[-100:] if profile.performance_history else []
        
        analytics = {
            'student_id': student_id,
            'profile': {
                'name': profile.name,
                'created_at': profile.created_at.isoformat(),
                'last_active': profile.last_active.isoformat(),
                'total_sessions': profile.total_sessions,
                'total_questions': profile.total_questions,
                'overall_accuracy': profile.overall_accuracy,
                'current_skill_level': profile.skill_level,
                'current_state': profile.current_state.value
            },
            'knowledge_state': profile.knowledge_state,
            'performance_trends': {
                'last_10': np.mean([h['correct'] for h in recent_history[-10:]]) if len(recent_history) >= 10 else None,
                'last_50': np.mean([h['correct'] for h in recent_history[-50:]]) if len(recent_history) >= 50 else None,
                'last_100': np.mean([h['correct'] for h in recent_history]) if recent_history else None,
            },
            'topic_performance': self._calculate_topic_performance(profile),
            'difficulty_performance': self._calculate_difficulty_performance(profile),
            'learning_velocity': self._calculate_learning_velocity(profile),
            'strengths': self._identify_strengths(profile),
            'weaknesses': self._identify_weaknesses(profile)
        }
        
        return analytics
    
    def _calculate_topic_performance(self, profile: StudentProfile) -> Dict[str, float]:
        """Calculate performance by topic"""
        
        topic_stats = defaultdict(lambda: {'correct': 0, 'total': 0})
        
        for record in profile.performance_history:
            topic = record['topic']
            topic_stats[topic]['total'] += 1
            if record['correct']:
                topic_stats[topic]['correct'] += 1
        
        return {
            topic: (stats['correct'] / stats['total']) if stats['total'] > 0 else 0
            for topic, stats in topic_stats.items()
        }
    
    def _calculate_difficulty_performance(self, profile: StudentProfile) -> Dict[str, float]:
        """Calculate performance by difficulty level"""
        
        difficulty_bins = ['very_easy', 'easy', 'medium', 'hard', 'very_hard']
        diff_stats = {bin_name: {'correct': 0, 'total': 0} for bin_name in difficulty_bins}
        
        for record in profile.performance_history:
            diff = record['difficulty']
            bin_idx = min(4, int(diff * 5))
            bin_name = difficulty_bins[bin_idx]
            
            diff_stats[bin_name]['total'] += 1
            if record['correct']:
                diff_stats[bin_name]['correct'] += 1
        
        return {
            bin_name: (stats['correct'] / stats['total']) if stats['total'] > 0 else 0
            for bin_name, stats in diff_stats.items()
        }
    
    def _calculate_learning_velocity(self, profile: StudentProfile) -> float:
        """Calculate how fast the student is learning"""
        
        if len(profile.performance_history) < 20:
            return 0.0
        
        # Compare recent performance to older performance
        old_performance = profile.performance_history[-50:-25] if len(profile.performance_history) >= 50 else profile.performance_history[:len(profile.performance_history)//2]
        recent_performance = profile.performance_history[-25:]
        
        old_accuracy = np.mean([h['correct'] for h in old_performance])
        recent_accuracy = np.mean([h['correct'] for h in recent_performance])
        
        return recent_accuracy - old_accuracy
    
    def _identify_strengths(self, profile: StudentProfile) -> List[str]:
        """Identify student's strengths"""
        
        strengths = []
        
        # Topics with high mastery
        strong_topics = [
            topic for topic, knowledge in profile.knowledge_state.items()
            if knowledge > 0.7
        ]
        if strong_topics:
            strengths.extend(strong_topics[:3])
        
        return strengths
    
    def _identify_weaknesses(self, profile: StudentProfile) -> List[str]:
        """Identify student's weaknesses"""
        
        weaknesses = []
        
        # Topics with low mastery
        weak_topics = [
            topic for topic, knowledge in profile.knowledge_state.items()
            if knowledge < 0.4
        ]
        if weak_topics:
            weaknesses.extend(weak_topics[:3])
        
        return weaknesses

## 4. REST API Implementation
Flask-based API for easy integration with any frontend.

In [14]:
# Initialize the agent
agent = AdaptiveTutorialAgent()

# Create Flask app
app = Flask(__name__)
CORS(app)  # Enable CORS for frontend integration

@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    return jsonify({
        'status': 'healthy',
        'timestamp': datetime.now().isoformat(),
        'performance': agent.performance_monitor.get_summary()
    })

@app.route('/student/create', methods=['POST'])
def create_student():
    """Create a new student profile"""
    data = request.json
    student_id = data.get('student_id', str(uuid.uuid4()))
    name = data.get('name', None)
    
    profile = agent.create_student_profile(student_id, name)
    
    return jsonify({
        'success': True,
        'student_id': profile.id,
        'profile': {
            'name': profile.name,
            'skill_level': profile.skill_level,
            'state': profile.current_state.value
        }
    })

@app.route('/session/start', methods=['POST'])
def start_session():
    """Start a new learning session"""
    data = request.json
    student_id = data.get('student_id')
    
    if not student_id:
        return jsonify({'error': 'student_id required'}), 400
    
    session = agent.start_session(student_id)
    
    return jsonify({
        'success': True,
        'session_id': session.session_id,
        'student_id': session.student_id,
        'timestamp': session.start_time.isoformat()
    })

@app.route('/question/next', methods=['POST'])
def get_next_question():
    """Get the next question for the session"""
    data = request.json
    session_id = data.get('session_id')
    
    if not session_id:
        return jsonify({'error': 'session_id required'}), 400
    
    question_data = agent.get_next_question(session_id)
    
    if 'error' in question_data:
        return jsonify(question_data), 400
    
    return jsonify(question_data)

@app.route('/answer/submit', methods=['POST'])
def submit_answer():
    """Submit an answer to a question"""
    data = request.json
    session_id = data.get('session_id')
    question_id = data.get('question_id')
    answer = data.get('answer')
    time_taken = data.get('time_taken', None)
    
    if not all([session_id, question_id, answer]):
        return jsonify({'error': 'session_id, question_id, and answer required'}), 400
    
    result = agent.submit_answer(session_id, question_id, answer, time_taken)
    
    if 'error' in result:
        return jsonify(result), 400
    
    return jsonify(result)

@app.route('/session/end', methods=['POST'])
def end_session():
    """End a learning session"""
    data = request.json
    session_id = data.get('session_id')
    
    if not session_id:
        return jsonify({'error': 'session_id required'}), 400
    
    summary = agent.end_session(session_id)
    
    if 'error' in summary:
        return jsonify(summary), 400
    
    return jsonify(summary)

@app.route('/student/analytics/', methods=['GET'])
def get_analytics(student_id):
    """Get detailed analytics for a student"""
    analytics = agent.get_student_analytics(student_id)
    
    if 'error' in analytics:
        return jsonify(analytics), 404
    
    return jsonify(analytics)

@app.route('/student/list', methods=['GET'])
def list_students():
    """List all students"""
    students = []
    for student_id, profile in agent.student_profiles.items():
        students.append({
            'student_id': student_id,
            'name': profile.name,
            'last_active': profile.last_active.isoformat(),
            'total_sessions': profile.total_sessions,
            'skill_level': profile.skill_level,
            'state': profile.current_state.value
        })
    
    return jsonify({
        'students': students,
        'total': len(students)
    })

@app.route('/system/config', methods=['GET', 'PUT'])
def system_config():
    """Get or update system configuration"""
    if request.method == 'GET':
        return jsonify(agent.config)
    else:
        data = request.json
        agent.config.update(data)
        return jsonify({
            'success': True,
            'config': agent.config
        })

# Error handlers
@app.errorhandler(404)
def not_found(error):
    return jsonify({'error': 'Endpoint not found'}), 404

@app.errorhandler(500)
def internal_error(error):
    return jsonify({'error': 'Internal server error'}), 500

print("✅ API endpoints configured successfully")

2025-08-05 16:41:31,786 - AdaptiveTutorAgent - INFO - Question bank initialized with 6 topics
2025-08-05 16:41:31,801 - AdaptiveTutorAgent - INFO - ✅ REINFORCE model loaded successfully
2025-08-05 16:41:32,214 - AdaptiveTutorAgent - INFO - ✅ Thompson Sampler loaded successfully
2025-08-05 16:41:32,241 - AdaptiveTutorAgent - INFO - Adaptive Tutorial Agent initialized successfully


✅ API endpoints configured successfully


## 5. Testing the Integrated System
Run tests to verify everything is working correctly.

In [15]:
def test_agent_integration():
    """Test the complete agent integration"""
    
    print("="*60)
    print("🧪 TESTING ADAPTIVE TUTORIAL AGENT")
    print("="*60)
    
    # Test 1: Create student
    print("\n📝 Test 1: Creating student profile...")
    test_student_id = "test_student_001"
    profile = agent.create_student_profile(test_student_id, "Test Student")
    assert profile.id == test_student_id
    print(f"✅ Student created: {profile.name} (Skill: {profile.skill_level:.2f})")
    
    # Test 2: Start session
    print("\n📝 Test 2: Starting learning session...")
    session = agent.start_session(test_student_id)
    session_id = session.session_id
    print(f"✅ Session started: {session_id}")
    
    # Test 3: Get questions and submit answers
    print("\n📝 Test 3: Running learning cycle...")
    
    for i in range(5):
        # Get question
        question_data = agent.get_next_question(session_id)
        print(f"\n  Question {i+1}:")
        print(f"    Topic: {question_data['topic']}")
        print(f"    Difficulty: {question_data['difficulty']:.2f}")
        print(f"    Content: {question_data['question']}")
        
        # Simulate answer (correct based on difficulty vs skill)
        skill = profile.skill_level
        difficulty = question_data['difficulty']
        prob_correct = 1.0 / (1.0 + np.exp(5 * (difficulty - skill)))
        is_correct = np.random.random() < prob_correct
        
        answer = "answer_0" if is_correct else "wrong_answer"
        
        # Submit answer
        result = agent.submit_answer(
            session_id,
            question_data['question_id'],
            answer,
            time_taken=np.random.uniform(10, 60)
        )
        
        print(f"    Answer: {'✅ Correct' if result['correct'] else '❌ Incorrect'}")
        print(f"    Reward: {result['reward']:.2f}")
        print(f"    New skill level: {result['updated_skill_level']:.3f}")
    
    # Test 4: End session
    print("\n📝 Test 4: Ending session...")
    summary = agent.end_session(session_id)
    print(f"✅ Session ended:")
    print(f"   Duration: {summary['duration_seconds']:.1f} seconds")
    print(f"   Accuracy: {summary['accuracy']:.1%}")
    print(f"   Total rewards: {summary['total_rewards']:.2f}")
    print(f"   Student state: {summary['student_state']}")
    
    # Test 5: Get analytics
    print("\n📝 Test 5: Getting student analytics...")
    analytics = agent.get_student_analytics(test_student_id)
    print(f"✅ Analytics retrieved:")
    print(f"   Total questions: {analytics['profile']['total_questions']}")
    print(f"   Overall accuracy: {analytics['profile']['overall_accuracy']:.1%}")
    print(f"   Current state: {analytics['profile']['current_state']}")
    
    if analytics['strengths']:
        print(f"   Strengths: {', '.join(analytics['strengths'])}")
    if analytics['weaknesses']:
        print(f"   Weaknesses: {', '.join(analytics['weaknesses'])}")
    
    print("\n" + "="*60)
    print("✅ ALL TESTS PASSED!")
    print("="*60)
    
    return True

# Run tests
test_success = test_agent_integration()

2025-08-05 16:42:05,356 - AdaptiveTutorAgent - INFO - Created new profile for student: test_student_001
2025-08-05 16:42:05,356 - AdaptiveTutorAgent - INFO - Started session 1e3dde7c-803e-46a5-9695-4c5c7e2ec5cd for student test_student_001


🧪 TESTING ADAPTIVE TUTORIAL AGENT

📝 Test 1: Creating student profile...
✅ Student created: Test Student (Skill: 0.50)

📝 Test 2: Starting learning session...
✅ Session started: 1e3dde7c-803e-46a5-9695-4c5c7e2ec5cd

📝 Test 3: Running learning cycle...


2025-08-05 16:42:05,802 - AdaptiveTutorAgent - INFO - Question selected for session 1e3dde7c-803e-46a5-9695-4c5c7e2ec5cd: Topic=calculus_intro, Difficulty=0.61
2025-08-05 16:42:05,804 - AdaptiveTutorAgent - INFO - Answer submitted for session 1e3dde7c-803e-46a5-9695-4c5c7e2ec5cd: Correct=False, Reward=-0.07
2025-08-05 16:42:05,804 - AdaptiveTutorAgent - INFO - Question selected for session 1e3dde7c-803e-46a5-9695-4c5c7e2ec5cd: Topic=geometry, Difficulty=0.61
2025-08-05 16:42:05,805 - AdaptiveTutorAgent - INFO - Answer submitted for session 1e3dde7c-803e-46a5-9695-4c5c7e2ec5cd: Correct=False, Reward=-0.12
2025-08-05 16:42:05,806 - AdaptiveTutorAgent - INFO - Question selected for session 1e3dde7c-803e-46a5-9695-4c5c7e2ec5cd: Topic=calculus_intro, Difficulty=0.28
2025-08-05 16:42:05,806 - AdaptiveTutorAgent - INFO - Answer submitted for session 1e3dde7c-803e-46a5-9695-4c5c7e2ec5cd: Correct=False, Reward=-0.18
2025-08-05 16:42:05,807 - AdaptiveTutorAgent - INFO - Question selected for ses


  Question 1:
    Topic: calculus_intro
    Difficulty: 0.61
    Content: Question about calculus intro (Level 0.6)
    Answer: ❌ Incorrect
    Reward: -0.07
    New skill level: 0.450

  Question 2:
    Topic: geometry
    Difficulty: 0.61
    Content: Question about geometry (Level 0.6)
    Answer: ❌ Incorrect
    Reward: -0.12
    New skill level: 0.405

  Question 3:
    Topic: calculus_intro
    Difficulty: 0.28
    Content: Question about calculus intro (Level 0.3)
    Answer: ❌ Incorrect
    Reward: -0.18
    New skill level: 0.365

  Question 4:
    Topic: linear_equations
    Difficulty: 0.45
    Content: Question about linear equations (Level 0.5)
    Answer: ❌ Incorrect
    Reward: -0.03
    New skill level: 0.328

  Question 5:
    Topic: trigonometry
    Difficulty: 0.51
    Content: Question about trigonometry (Level 0.5)
    Answer: ❌ Incorrect
    Reward: -0.13
    New skill level: 0.295

📝 Test 4: Ending session...
✅ Session ended:
   Duration: 0.5 seconds
   Accuracy