In [None]:
import pandas as pd
from datetime import datetime
from typing import Dict, List, Tuple
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import numpy as np

class ConversationAnalyzer:
    def __init__(self):
        self.vectorizer = TfidfVectorizer()
        self.conversation_logs = []
        self.prompt_performance = {}
        
    def log_conversation(self, 
                        conversation_id: str,
                        conversation_data: Dict,
                        prompt_used: str,
                        success_metrics: Dict):
        """Registra una conversación para análisis posterior."""
        self.conversation_logs.append({
            'id': conversation_id,
            'timestamp': datetime.now(),
            'conversation': conversation_data,
            'prompt': prompt_used,
            'metrics': success_metrics
        })
    
    def analyze_conversation_patterns(self) -> Dict:
        """Analiza patrones en las conversaciones registradas."""
        # Preparar datos para análisis
        conversations = [log['conversation'] for log in self.conversation_logs]
        texts = [self._flatten_conversation(conv) for conv in conversations]
        
        # Vectorizar conversaciones
        X = self.vectorizer.fit_transform(texts)
        
        # Realizar clustering para identificar patrones
        kmeans = KMeans(n_clusters=5, random_state=42)
        clusters = kmeans.fit_predict(X)
        
        # Analizar características de cada cluster
        patterns = self._analyze_clusters(clusters, texts, conversations)
        
        return patterns
    
    def _flatten_conversation(self, conversation: Dict) -> str:
        """Convierte una conversación en texto plano para análisis."""
        messages = []
        for turn in conversation:
            messages.append(f"USER: {turn['user']}")
            messages.append(f"BOT: {turn['bot']}")
        return " ".join(messages)
    
    def _analyze_clusters(self, 
                         clusters: np.ndarray, 
                         texts: List[str], 
                         conversations: List[Dict]) -> Dict:
        """Analiza las características de cada cluster."""
        cluster_analysis = {}
        
        for cluster_id in range(max(clusters) + 1):
            cluster_texts = [t for i, t in enumerate(texts) if clusters[i] == cluster_id]
            cluster_convs = [c for i, c in enumerate(conversations) if clusters[i] == cluster_id]
            
            cluster_analysis[cluster_id] = {
                'size': len(cluster_texts),
                'avg_turns': np.mean([len(c) for c in cluster_convs]),
                'success_rate': self._calculate_success_rate(cluster_convs),
                'common_patterns': self._extract_common_patterns(cluster_texts)
            }
        
        return cluster_analysis

class PromptOptimizer:
    def __init__(self):
        self.prompt_templates = {}
        self.performance_history = []
        
    def register_prompt_template(self, 
                               template_id: str, 
                               template: str, 
                               parameters: List[str]):
        """Registra un nuevo template de prompt."""
        self.prompt_templates[template_id] = {
            'template': template,
            'parameters': parameters,
            'performance_score': 0.0,
            'usage_count': 0
        }
    
    def update_prompt_performance(self, 
                                template_id: str, 
                                performance_metrics: Dict):
        """Actualiza el rendimiento de un prompt específico."""
        if template_id in self.prompt_templates:
            template = self.prompt_templates[template_id]
            template['usage_count'] += 1
            
            # Calcular nuevo score
            new_score = self._calculate_performance_score(performance_metrics)
            
            # Actualizar score con media móvil exponencial
            alpha = 0.3  # Factor de suavizado
            old_score = template['performance_score']
            template['performance_score'] = (alpha * new_score + 
                                          (1 - alpha) * old_score)
            
            self.performance_history.append({
                'template_id': template_id,
                'timestamp': datetime.now(),
                'metrics': performance_metrics,
                'score': new_score
            })
    
    def optimize_prompt(self, template_id: str) -> str:
        """Optimiza un prompt basado en su historial de rendimiento."""
        if template_id not in self.prompt_templates:
            return None
            
        template = self.prompt_templates[template_id]
        
        # Analizar historial de rendimiento
        history = [h for h in self.performance_history 
                  if h['template_id'] == template_id]
        
        # Identificar patrones de éxito
        successful_patterns = self._identify_successful_patterns(history)
        
        # Generar prompt optimizado
        optimized_prompt = self._generate_optimized_prompt(
            template['template'],
            successful_patterns
        )
        
        return optimized_prompt
    
    def _calculate_performance_score(self, metrics: Dict) -> float:
        """Calcula un score de rendimiento basado en métricas múltiples."""
        weights = {
            'completion_rate': 0.4,
            'response_relevance': 0.3,
            'user_satisfaction': 0.3
        }
        
        score = sum(weights[metric] * value 
                   for metric, value in metrics.items() 
                   if metric in weights)
        
        return score

class ContinuousLearningSystem:
    def __init__(self):
        self.analyzer = ConversationAnalyzer()
        self.optimizer = PromptOptimizer()
        
    def process_conversation(self, 
                           conversation_data: Dict, 
                           prompt_used: str, 
                           metrics: Dict):
        """Procesa una nueva conversación para aprendizaje."""
        # Registrar conversación
        self.analyzer.log_conversation(
            conversation_id=str(datetime.now().timestamp()),
            conversation_data=conversation_data,
            prompt_used=prompt_used,
            success_metrics=metrics
        )
        
        # Actualizar rendimiento del prompt
        self.optimizer.update_prompt_performance(
            template_id=prompt_used,
            performance_metrics=metrics
        )
        
        # Analizar si es necesario optimizar
        if self._should_optimize_prompt(prompt_used):
            optimized_prompt = self.optimizer.optimize_prompt(prompt_used)
            return optimized_prompt
        
        return None
    
    def generate_insights_report(self) -> Dict:
        """Genera un reporte de insights basado en el análisis."""
        patterns = self.analyzer.analyze_conversation_patterns()
        performance_data = self._analyze_performance_trends()
        
        return {
            'conversation_patterns': patterns,
            'performance_trends': performance_data,
            'optimization_recommendations': self._generate_recommendations()
        }
    
    def _should_optimize_prompt(self, prompt_id: str) -> bool:
        """Determina si un prompt debe ser optimizado."""
        template = self.optimizer.prompt_templates.get(prompt_id)
        if not template:
            return False
            
        # Criterios para optimización
        min_usage_count = 50
        performance_threshold = 0.7
        
        return (template['usage_count'] >= min_usage_count and
                template['performance_score'] < performance_threshold)