In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import joblib
import os
import re
import json
import logging
import time
from datetime import datetime
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
import requests
from bs4 import BeautifulSoup, Comment
import concurrent.futures
import unicodedata
import hashlib
import html
from urllib.parse import quote_plus

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("tt_qa_system.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger("TT_QA_System")

# --- Constants and Configuration ---
class Config:
    # Files and Paths
    CSV_FILE = 'File.csv'
    MODEL_DIR = 'models'
    CACHE_DIR = 'cache'
    
    # Confidence Thresholds
    LOCAL_HIGH_CONFIDENCE = 0.78
    LOCAL_MODERATE_CONFIDENCE = 0.48
    LOCAL_LOW_CONFIDENCE = 0.20
    
    # Web Search Configuration
    ENABLE_WEB_SEARCH = True
    MAX_WEB_RESULTS = 3
    MAX_CHARS_PER_PAGE = 3000
    REQUEST_TIMEOUT = 10
    
    # Answer Generation
    SHORT_ANSWER_SENTENCES = 2
    MAX_ANSWER_LENGTH = 500
    
    # Caching
    USE_CACHE = True
    CACHE_EXPIRY = 86400  # 24 hours
    
    # Tunisie Telecom Specific
    TT_FACEBOOK_URL = "https://www.facebook.com/TunisieTelecom"
    TT_WEBSITE = "https://www.tunisietelecom.tn"
    
    @staticmethod
    def get_paths():
        """Create necessary directories if they don't exist"""
        for directory in [Config.MODEL_DIR, Config.CACHE_DIR]:
            os.makedirs(directory, exist_ok=True)
        return {
            'model_path': os.path.join(Config.MODEL_DIR, 'tt_qa_model.joblib'),
            'tfidf_path': os.path.join(Config.MODEL_DIR, 'tt_tfidf_vectorizer.joblib'),
            'cache_path': os.path.join(Config.CACHE_DIR, 'web_cache.json')
        }

# --- NLTK Resource Setup ---
def check_nltk_resources():
    """Download and setup required NLTK resources"""
    try:
        nltk.data.find('tokenizers/punkt')
    except LookupError:
        nltk.download('punkt', quiet=True)
        
    try:
        nltk.data.find('corpora/stopwords')
    except LookupError:
        nltk.download('stopwords', quiet=True)
        
    try:
        nltk.data.find('corpora/wordnet')
    except LookupError:
        nltk.download('wordnet', quiet=True)
        
check_nltk_resources()
nltk_stopwords = set(stopwords.words('english'))
try:
    french_stopwords = set(stopwords.words('french'))
    nltk_stopwords.update(french_stopwords)
except:
    pass

# --- TT Keywords ---
TT_KEYWORDS = [
    # Core Company & General Terms
    "tt", "tunisie telecom", "telecom", "mytt", "espace tt", "client", "clients",
    "service", "services", "offre", "offres", "promotionnelle", "promotion", "promo",
    "abonnement", "forfait", "forfaits", "facture", "factures", "paiement", "recharge", "recharges",
    "code", "codes", "ussd", "ligne", "mobile", "fixe", "internet", "data", "voix", "appel", "appels",
    "sms", "message", "solde", "balance", "bonus", "activation", "prix", "tarif", "tarifs",
    "validité", "kelma", "aide", "assistance", "support", "information", "complémentaire",

    # Products and services 
    "doubliha", "tourist sim", "jaweknet", "hadra net", "esim", "netbox", "box 5g tt",
    "fibre", "roaming", "oh!mega", "forfait oh!mega", "international", "pass etudiant",
    "sabba", "trankil", "kallemni", "tourist", "ehdia", "transfert",
     
    # USSD codes 
    "*140#", "*135#", "*230#", "*211#", "*122*26#", "*152#", "*106#", "*122*1#",
    "*255#", "*122*5#", "*122*2#", "*540#", "*140*7*9#", "*140*7#", "*123#",
    "*133#", "*111#", "*153#", "*124#", "*100#", "*146#", "*43#", "*62*1162#",
    
    # Customer service
    "1298", "centre d'appel", "1288", "centre d'appel b2b", "1199", "serveur vocal",
    
    # Entertainment and other services
    "play vod by tt", "streaming", "diwan sport", "mouzikti", "gameloft"
]

OFF_TOPIC_KEYWORDS = [
    "capital of", "weather in", "recipe for", "movie", "song", "history of",
    "sports score", "president of", "king of", "queen of", "what's the stock price of",
    "who won", "how to make", "what is the population of", "pizza", "cake", "cook", "food"
]

# --- Text Processing Utilities ---
class TextProcessor:
    """Enhanced text processing utilities"""
    
    def __init__(self):
        """Initialize text processing components"""
        self.stopwords = nltk_stopwords
        self.lemmatizer = WordNetLemmatizer()
        self.stemmer = PorterStemmer()
    
    def normalize_text(self, text):
        """Normalize text by removing accents and converting to lowercase"""
        if not isinstance(text, str):
            return ""
        # Decode HTML entities
        text = html.unescape(text)
        # Normalize unicode
        text = unicodedata.normalize('NFKD', text)
        # Remove non-ascii characters
        text = ''.join(c for c in text if not unicodedata.combining(c))
        # Convert to lowercase
        text = text.lower()
        return text
    
    def basic_clean(self, text):
        """Basic text cleaning"""
        if not isinstance(text, str):
            return ""
        text = self.normalize_text(text)
        # Remove non-word characters except spaces and preserve hashtags, asterisks, and question marks
        text = re.sub(r'[^\w\s#*?]', '', text)
        # Normalize whitespace
        text = re.sub(r'\s+', ' ', text).strip()
        return text
    
    def advanced_clean(self, text):
        """Advanced text cleaning with lemmatization and stopword removal"""
        if not isinstance(text, str):
            return ""
        text = self.normalize_text(text)
        # Tokenize
        tokens = word_tokenize(text)
        # Remove stopwords and lemmatize
        tokens = [self.lemmatizer.lemmatize(token) for token in tokens 
                  if token.isalnum() and token not in self.stopwords]
        # Rejoin tokens
        return ' '.join(tokens)

# --- Google Search Web Content Manager ---
class GoogleSearchManager:
    """Manager for Google search and web content processing"""
    
    def __init__(self):
        """Initialize Google search manager"""
        self.cache = {}
        self.cache_expiry = {}
        self.load_cache()
        self.text_processor = TextProcessor()
    
    def load_cache(self):
        """Load cache from file"""
        cache_path = Config.get_paths()['cache_path']
        if Config.USE_CACHE and os.path.exists(cache_path):
            try:
                with open(cache_path, 'r', encoding='utf-8') as f:
                    cache_data = json.load(f)
                    self.cache = cache_data.get('content', {})
                    self.cache_expiry = cache_data.get('expiry', {})
                logger.info(f"Loaded {len(self.cache)} items from web content cache")
            except Exception as e:
                logger.error(f"Failed to load cache: {str(e)}")
                self.cache = {}
                self.cache_expiry = {}
    
    def save_cache(self):
        """Save cache to file"""
        if not Config.USE_CACHE:
            return
            
        cache_path = Config.get_paths()['cache_path']
        cache_dir = os.path.dirname(cache_path)
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
            
        try:
            cache_data = {
                'content': self.cache,
                'expiry': self.cache_expiry,
                'timestamp': datetime.now().isoformat()
            }
            with open(cache_path, 'w', encoding='utf-8') as f:
                json.dump(cache_data, f, ensure_ascii=False, indent=2)
            logger.info(f"Saved {len(self.cache)} items to web content cache")
        except Exception as e:
            logger.error(f"Failed to save cache: {str(e)}")
    
    def _get_cache_key(self, url):
        """Generate a cache key for a URL"""
        return hashlib.md5(url.encode('utf-8')).hexdigest()
    
    def extract_main_content(self, html_content, query_terms):
        """Extract relevant content from HTML"""
        soup = BeautifulSoup(html_content, 'lxml')
        
        # Remove unnecessary elements
        for tag in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "figcaption"]):
            tag.decompose()
        
        # Remove HTML comments
        for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
            comment.extract()
        
        # Extract paragraphs
        paragraphs = soup.find_all('p')
        relevant_texts = []
        
        for p in paragraphs:
            p_text = p.get_text(separator=' ', strip=True)
            if len(p_text.split()) < 5:
                continue
                
            # Score paragraph relevance to query
            score = sum(1 for term in query_terms if term.lower() in p_text.lower())
            if score > 0:
                relevant_texts.append(p_text)
        
        # If no relevant paragraphs, get all text from body
        if not relevant_texts and soup.body:
            body_text = soup.body.get_text(separator=' ', strip=True)
            if body_text:
                relevant_texts.append(body_text[:Config.MAX_CHARS_PER_PAGE//2])
        
        return " ".join(relevant_texts)[:Config.MAX_CHARS_PER_PAGE]
    
    def fetch_web_content(self, url, query_terms):
        """Fetch content from a web URL with caching"""
        if not url:
            return None
        
        cache_key = self._get_cache_key(url)
        
        # Check cache first if enabled
        if Config.USE_CACHE:
            now = time.time()
            if cache_key in self.cache and cache_key in self.cache_expiry:
                if now < self.cache_expiry[cache_key]:
                    logger.info(f"Using cached content for: {url}")
                    return self.cache[cache_key]
        
        try:
            logger.info(f"Fetching content from: {url}")
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language': 'en-US,en;q=0.5,fr;q=0.3',
                'Cache-Control': 'no-cache',
                'Pragma': 'no-cache',
            }
            
            response = requests.get(url, headers=headers, timeout=Config.REQUEST_TIMEOUT)
            response.raise_for_status()
            
            # Check if response is HTML
            content_type = response.headers.get('Content-Type', '').lower()
            if 'text/html' not in content_type and 'application/xhtml+xml' not in content_type:
                logger.warning(f"URL {url} returned non-HTML content: {content_type}")
                return None
            
            # Process the HTML content
            extracted_content = self.extract_main_content(response.text, query_terms)
            
            # Cache the content if enabled
            if Config.USE_CACHE and extracted_content:
                self.cache[cache_key] = extracted_content
                self.cache_expiry[cache_key] = time.time() + Config.CACHE_EXPIRY
                # Periodically save cache
                if len(self.cache) % 10 == 0:
                    self.save_cache()
            
            return extracted_content
        
        except Exception as e:
            logger.error(f"Error fetching {url}: {str(e)}")
            return None
    
    def google_search(self, query, site_filter=None, num_results=10):
        """Perform a Google search and return results"""
        try:
            # Format the query with site filter if provided
            formatted_query = query
            if site_filter:
                formatted_query = f"{query} site:{site_filter}"
            
            # URL encode the query
            encoded_query = quote_plus(formatted_query)
            
            # Construct the search URL
            search_url = f"https://www.google.com/search?q={encoded_query}&num={num_results}&hl=en"
            
            # Send the request with browser-like headers
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language': 'en-US,en;q=0.5,fr;q=0.3'
            }
            
            logger.info(f"Searching Google for: {formatted_query}")
            response = requests.get(search_url, headers=headers)
            response.raise_for_status()
            
            # Parse the HTML response
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Extract search results
            results = []
            
            # Find result blocks
            search_divs = soup.find_all('div', class_=['g', 'tF2Cxc'])
            
            for div in search_divs:
                # Extract title and link
                title_elem = div.find('h3')
                if not title_elem:
                    continue
                
                title = title_elem.get_text()
                
                # Find URL
                link_elem = div.find('a')
                if not link_elem:
                    continue
                
                href = link_elem.get('href', '')
                if href.startswith('/url?'):
                    href = re.search(r'url\?q=([^&]+)', href).group(1)
                
                if not href.startswith('http'):
                    continue
                
                # Extract snippet
                snippet_elem = div.find('div', class_=['VwiC3b', 'yXK7lf', 'MUxGbd', 'yDYNvb', 'lyLwlc'])
                snippet = snippet_elem.get_text() if snippet_elem else ""
                
                results.append({
                    'title': title,
                    'href': href,
                    'snippet': snippet
                })
            
            logger.info(f"Found {len(results)} Google search results")
            return results[:num_results]
            
        except Exception as e:
            logger.error(f"Google search error: {str(e)}")
            return []
    
    def search_web(self, query):
        """Search the web for information about the query"""
        logger.info(f"Performing web search for: {query}")
        query_terms = self.text_processor.basic_clean(query).split()
        
        # Create search variations to get better coverage
        search_configs = [
            {"query": query, "site": "tunisietelecom.tn", "source_type": "Official Site"},
            {"query": f"Tunisie Telecom {query}", "site": None, "source_type": "General Web"},
            {"query": f"{query} Tunisie Telecom facebook", "site": "facebook.com", "source_type": "Social Media"},
        ]
        
        all_results = []
        processed_urls = set()
        
        for config in search_configs:
            try:
                # Perform Google search
                results = self.google_search(
                    query=config["query"],
                    site_filter=config["site"],
                    num_results=Config.MAX_WEB_RESULTS
                )
                
                for result in results:
                    url = result.get('href')
                    title = result.get('title', '')
                    
                    # Skip already processed URLs
                    if not url or url in processed_urls:
                        continue
                    
                    processed_urls.add(url)
                    
                    # Try to fetch content
                    content = self.fetch_web_content(url, query_terms)
                    if content:
                        all_results.append({
                            'url': url,
                            'title': title,
                            'content': content,
                            'source_type': config["source_type"],
                            'snippet': result.get('snippet', '')
                        })
                    elif result.get('snippet'):
                        # Use snippet as fallback
                        all_results.append({
                            'url': url,
                            'title': title,
                            'content': result['snippet'],
                            'source_type': config["source_type"],
                            'snippet': result['snippet']
                        })
                        
            except Exception as e:
                logger.error(f"Search error for {config['query']}: {str(e)}")
                
                # If search fails, try direct access to common TT URLs (fallback)
                if config["source_type"] == "Official Site":
                    common_tt_urls = [
                        "https://www.tunisietelecom.tn/particulier/assistance/",
                        "https://www.tunisietelecom.tn/particulier/mobile/options-services/",
                        f"https://www.tunisietelecom.tn/particulier/recherche/?q={quote_plus(query)}"
                    ]
                    
                    for direct_url in common_tt_urls:
                        if direct_url in processed_urls:
                            continue
                            
                        processed_urls.add(direct_url)
                        content = self.fetch_web_content(direct_url, query_terms)
                        if content:
                            all_results.append({
                                'url': direct_url,
                                'title': "Tunisie Telecom",
                                'content': content,
                                'source_type': "Official Site (Direct)",
                                'snippet': ""
                            })
        
        # Sort results by source priority and prefer official TT website
        def sort_key(result):
            source_priority = {"Official Site": 0, "Official Site (Direct)": 0, "General Web": 1, "Social Media": 2}
            url_priority = 0 if "tunisietelecom.tn" in result.get('url', '') else 1
            return (source_priority.get(result['source_type'], 99), url_priority)
        
        all_results.sort(key=sort_key)
        
        return all_results[:Config.MAX_WEB_RESULTS]

# --- Enhanced QnA Model ---
class EnhancedQnAModel:
    """Enhanced Question-Answering model"""
    
    def __init__(self, csv_path=Config.CSV_FILE):
        """Initialize the QnA model"""
        self.csv_path = csv_path
        self.paths = Config.get_paths()
        
        # Initialize components
        self.text_processor = TextProcessor()
        self.vectorizer = None
        self.nn_model = None
        
        # Data containers
        self.df = None
        self.original_questions = []
        self.cleaned_questions = []
        self.answers = []
        
        # Load or train the model
        self.load_or_train_model()
    
    def load_or_train_model(self):
        """Load a pre-trained model if available, otherwise train a new one"""
        if self._load_model():
            logger.info("Loaded pre-trained QnA model")
        else:
            logger.info("Training new QnA model")
            self._train_model()
            self._save_model()
    
    def _load_model(self):
        """Load model from files if available"""
        try:
            # Check if all required files exist
            model_path = self.paths['model_path']
            tfidf_path = self.paths['tfidf_path']
            
            if not os.path.exists(model_path) or not os.path.exists(tfidf_path):
                return False
            
            # Load data from CSV first
            self._load_csv_data()
            
            # Load models
            nn_data = joblib.load(model_path)
            self.nn_model = nn_data['nn_model']
            self.original_questions = nn_data['original_questions']
            self.cleaned_questions = nn_data['cleaned_questions']
            self.answers = nn_data['answers']
            
            self.vectorizer = joblib.load(tfidf_path)
            
            return True
        
        except Exception as e:
            logger.error(f"Error loading model: {str(e)}")
            return False
    
    def _save_model(self):
        """Save model to files"""
        try:
            # Create model directory if it doesn't exist
            os.makedirs(os.path.dirname(self.paths['model_path']), exist_ok=True)
            
            # Save NN model and data
            nn_data = {
                'nn_model': self.nn_model,
                'original_questions': self.original_questions,
                'cleaned_questions': self.cleaned_questions,
                'answers': self.answers
            }
            joblib.dump(nn_data, self.paths['model_path'])
            
            # Save vectorizer
            joblib.dump(self.vectorizer, self.paths['tfidf_path'])
            
            logger.info("Model saved successfully")
        
        except Exception as e:
            logger.error(f"Error saving model: {str(e)}")
    
    def _load_csv_data(self):
        """Load and preprocess data from CSV file"""
        try:
            if not os.path.exists(self.csv_path):
                logger.error(f"CSV file not found: {self.csv_path}")
                return False
            
            self.df = pd.read_csv(self.csv_path)
            
            if 'question' not in self.df.columns or 'answer' not in self.df.columns:
                logger.error("CSV must have 'question' and 'answer' columns")
                return False
            
            # Remove rows with missing values
            self.df.dropna(subset=['question', 'answer'], inplace=True)
            
            if self.df.empty:
                logger.error("CSV contains no valid data after preprocessing")
                return False
            
            # Clean and preprocess texts
            self.df['cleaned_question'] = self.df['question'].apply(self.text_processor.basic_clean)
            
            # Remove rows with empty questions after cleaning
            self.df = self.df[self.df['cleaned_question'].str.strip() != '']
            
            # Store data in instance variables
            self.original_questions = self.df['question'].tolist()
            self.cleaned_questions = self.df['cleaned_question'].tolist()
            self.answers = self.df['answer'].tolist()
            
            logger.info(f"Loaded {len(self.original_questions)} QA pairs from CSV")
            return True
        
        except Exception as e:
            logger.error(f"Error loading CSV data: {str(e)}")
            return False
    
    def _train_model(self):
        """Train the QnA model using the data from CSV"""
        if not self._load_csv_data():
            logger.error("Failed to load CSV data. Model training aborted.")
            return
        
        try:
            # Train TF-IDF model with enhanced parameters
            logger.info("Training TF-IDF model...")
            tfidf_params = {
                'ngram_range': (1, 2),  # Use unigrams and bigrams
                'min_df': 1,
                'max_df': 0.95,
                'analyzer': 'word',
                'use_idf': True,
                'smooth_idf': True,
                'sublinear_tf': True
            }
            self.vectorizer = TfidfVectorizer(**tfidf_params)
            
            # Fit TF-IDF vectorizer on cleaned questions
            X_tfidf = self.vectorizer.fit_transform(self.cleaned_questions)
            
            # Train nearest neighbors model with optimal settings
            logger.info("Training nearest neighbors model...")
            self.nn_model = NearestNeighbors(
                n_neighbors=min(5, len(self.cleaned_questions)),  # Multiple neighbors for robustness
                metric='cosine',  # Cosine similarity is best for text
                algorithm='brute'  # Most accurate for small to medium datasets
            )
            self.nn_model.fit(X_tfidf)
            
            logger.info("Model training completed successfully")
        
        except Exception as e:
            logger.error(f"Error during model training: {str(e)}")
            raise
    
    def get_answer_with_details(self, query):
        """Get answer for a query with detailed matching information"""
        if not isinstance(query, str) or not query.strip():
            return {
                'answer': None,
                'matched_question': None,
                'similarity': 0.0,
                'index': -1,
                'top_matches': []
            }
        
        # Clean the query
        cleaned_query = self.text_processor.basic_clean(query)
        
        # Get TF-IDF matches
        try:
            query_vector = self.vectorizer.transform([cleaned_query])
            distances, indices = self.nn_model.kneighbors(query_vector)
            
            if len(indices) > 0 and len(indices[0]) > 0:
                top_matches = []
                for i, idx in enumerate(indices[0]):
                    sim = 1 - distances[0][i]
                    top_matches.append({
                        'question': self.original_questions[idx],
                        'similarity': float(sim),
                        'index': int(idx)
                    })
                
                # Best match is first in array
                best_idx = indices[0][0]
                best_sim = 1 - distances[0][0]
                
                return {
                    'answer': self.answers[best_idx],
                    'matched_question': self.original_questions[best_idx],
                    'similarity': float(best_sim),
                    'index': int(best_idx),
                    'method': 'tfidf',
                    'top_matches': top_matches[:3]  # Only return top 3 matches
                }
            
        except Exception as e:
            logger.error(f"Error in question matching: {str(e)}")
        
        return {
            'answer': None,
            'matched_question': None,
            'similarity': 0.0,
            'index': -1,
            'top_matches': []
        }

# --- Answer Generation ---
class AnswerGenerator:
    """Generate high-quality answers"""
    
    def __init__(self):
        """Initialize the answer generator"""
        self.text_processor = TextProcessor()
    
    def summarize_text(self, text, num_sentences=Config.SHORT_ANSWER_SENTENCES):
        """Summarize text by extracting most important sentences"""
        if not text:
            return "Aucune information disponible."
        
        # For short texts, just return them directly
        if len(text.split()) < 20:
            return text
            
        # Split into sentences
        sentences = sent_tokenize(text)
        
        # Filter out very short or irrelevant sentences
        meaningful_sentences = []
        for s in sentences:
            if len(s.split()) < 4:
                continue
                
            # Skip generic sentences that don't add value
            skip_patterns = [
                "cookie", "javascript", "terms of use", "privacy policy",
                "all rights reserved", "learn more", "click here", "pour plus d'informations"
            ]
            if any(pattern in s.lower() for pattern in skip_patterns):
                continue
                
            meaningful_sentences.append(s)
        
        if not meaningful_sentences:
            meaningful_sentences = [s for s in sentences if len(s.split()) > 3]
        
        if not meaningful_sentences:
            return "Information non disponible."
        
        # Take top N sentences
        summary = " ".join(meaningful_sentences[:num_sentences]).strip()
        
        # Truncate if too long
        if len(summary) > Config.MAX_ANSWER_LENGTH:
            summary = summary[:Config.MAX_ANSWER_LENGTH] + "..."
        
        return summary
    
    def format_answer_with_source(self, answer_text, source, confidence):
        """Format the answer with source attribution"""
        if source.startswith("LOCAL"):
            prefix = "[TT Knowledge Base]"
            if "LOW_CONF" in source:
                prefix = "[TT Knowledge Base (low confidence)]"
            elif "MOD_CONF" in source:
                prefix = "[TT Knowledge Base (moderate confidence)]"
            elif "HIGH_CONF" in source:
                prefix = "[TT Knowledge Base (high confidence)]"
            
            return f"{prefix}: {self.summarize_text(answer_text)}"
        
        elif source == "WEB_TT_FOUND":
            return f"[Web Info (Tunisie Telecom)]: {self.summarize_text(answer_text)}"
        
        return self.summarize_text(answer_text)
    
    def combine_sources(self, local_result, web_results):
        """Combine local and web sources to produce the best answer"""
        local_similarity = local_result.get('similarity', 0)
        local_answer = local_result.get('answer')
        
        result = {
            'answer': None,
            'source': "UNKNOWN",
            'confidence': 0,
            'informative': False
        }
        
        # 1. High confidence local answer
        if local_similarity >= Config.LOCAL_HIGH_CONFIDENCE and local_answer:
            result['answer'] = local_answer
            result['source'] = "LOCAL_HIGH_CONF"
            result['confidence'] = local_similarity
            result['informative'] = True
        
        # 2. Moderate confidence local answer
        elif local_similarity >= Config.LOCAL_MODERATE_CONFIDENCE and local_answer:
            result['answer'] = local_answer
            result['source'] = "LOCAL_MOD_CONF"
            result['confidence'] = local_similarity
            result['informative'] = True
        
        # 3. Web results available
        elif web_results:
            # Get best web result (already sorted by priority)
            best_web = web_results[0]
            result['answer'] = best_web['content']
            result['source'] = "WEB_TT_FOUND"
            result['confidence'] = 0.6  # Standard confidence for web results
            result['informative'] = True
            result['url'] = best_web.get('url')
        
        # 4. Low confidence local answer as fallback
        elif local_similarity >= Config.LOCAL_LOW_CONFIDENCE and local_answer:
            result['answer'] = local_answer
            result['source'] = "LOCAL_LOW_CONF"
            result['confidence'] = local_similarity
            result['informative'] = True
        
        # 5. No good match found
        else:
            result['answer'] = f"Je n'ai pas d'information spécifique sur ce sujet. Veuillez consulter le site officiel de Tunisie Telecom."
            result['source'] = "TT_NOT_IN_KB_NO_WEB"
            result['confidence'] = 0
            result['informative'] = False
        
        return result

# --- TunisieTelecom Agent ---
class EnhancedTunisieTelecomAgent:
    """Enhanced Tunisie Telecom specific agent for answering questions"""
    
    def __init__(self):
        """Initialize the TT agent"""
        self.qna_model = EnhancedQnAModel(Config.CSV_FILE)
        self.text_processor = TextProcessor()
        self.web_manager = GoogleSearchManager()
        self.answer_generator = AnswerGenerator()
        
        logger.info("TunisieTelecom Agent initialized successfully")
    
    def _is_query_tt_related(self, query, similarity_score=0.0):
        """Determine if a query is related to Tunisie Telecom"""
        query_lower = self.text_processor.basic_clean(query)
        
        # Check for TT keywords
        for keyword in TT_KEYWORDS:
            if re.search(r'\b' + re.escape(keyword) + r'\b', query_lower):
                logger.info(f"Query is TT-related (matched keyword: '{keyword}')")
                return True
        
        # Check for similarity to known questions
        if similarity_score > 0.30:  # Lower threshold for topic detection
            logger.info(f"Query is TT-related (similarity to known question: {similarity_score:.2f})")
            return True
        
        logger.info(f"Query is not TT-related: {query}")
        return False
    
    def _is_query_off_topic(self, query):
        """Determine if a query is completely off-topic"""
        query_lower = self.text_processor.basic_clean(query)
        
        # Check for off-topic indicators
        for keyword in OFF_TOPIC_KEYWORDS:
            if keyword in query_lower and not any(tt_kw in query_lower for tt_kw in TT_KEYWORDS):
                logger.info(f"Query is off-topic (matched off-topic keyword: '{keyword}')")
                return True
        
        # Check for food-related queries
        food_terms = ["pizza", "recipe", "food", "cook", "restaurant", "meal", "dish"]
        if any(term in query_lower for term in food_terms) and not any(tt_kw in query_lower for tt_kw in TT_KEYWORDS):
            logger.info(f"Query is off-topic (food related)")
            return True
            
        return False
    
    def process_query(self, query):
        """Process a user query and return detailed response data"""
        logger.info(f"Processing query: {query}")

        # Check if query is off-topic
        if self._is_query_off_topic(query):
            return {
                'answer': "I specialize in Tunisie Telecom topics. Please ask me about Tunisie Telecom services, products, or support.",
                'source': "OFF_TOPIC",
                'matched_question': None,
                'confidence': 0,
                'informative': False,
                'formatted_answer': "I specialize in Tunisie Telecom topics. Please ask me about Tunisie Telecom services, products, or support."
            }

        # Get answer from local model
        local_result = self.qna_model.get_answer_with_details(query)
        similarity = local_result.get('similarity', 0)

        logger.info(f"Local match similarity: {similarity:.4f}")
        if local_result.get('matched_question'):
            logger.info(f"Best matched question: {local_result.get('matched_question')}")

        # Determine if query is TT-related
        is_tt_query = self._is_query_tt_related(query, similarity)

        web_results = []
        # Perform web search if TT-related but low local confidence
        if is_tt_query and similarity < Config.LOCAL_MODERATE_CONFIDENCE and Config.ENABLE_WEB_SEARCH:
            logger.info("Performing web search for additional information")
            web_results = self.web_manager.search_web(query)

        # Combine sources and generate answer
        result = self.answer_generator.combine_sources(local_result, web_results)

        # Add additional information
        result['matched_question'] = local_result.get('matched_question')
        result['query'] = query
        result['is_tt_related'] = is_tt_query

        # Format the final answer
        if result.get('answer'):
            result['formatted_answer'] = result['answer']
        else:
            result['formatted_answer'] = "Je n'ai pas d'information sur ce sujet."

        return result
    
    def get_answer(self, query):
        """Get a clean answer to a user query without source attribution"""
        result = self.process_query(query)
        
        # If it's an off-topic query, return the standard response
        if result.get('source') == "OFF_TOPIC":
            return "Je me spécialise dans les sujets Tunisie Telecom. Veuillez me poser des questions sur les services, produits ou support de Tunisie Telecom."
        
        # Extract just the core answer information without any formatting or attribution
        if result.get('answer'):
            answer_text = result['answer']
            
            # Remove any formatting elements or prefixes
            answer_text = re.sub(r'\[.*?\]:', '', answer_text).strip()
            
            # For specific queries about codes, ensure they're prominently shown
            if any(term in query.lower() for term in ['code', 'ussd', 'recharge', 'activation', 'activer']):
                # Extract potential code patterns like *123#, *123*xyz#
                code_patterns = re.findall(r'(\*\d+(?:\*[^*#]*?)?#)', answer_text)
                if code_patterns:
                    return code_patterns[0]  # Return the first code found
                
                # Look for patterns like "composez le *123#" or "tapez *123#"
                instruction_match = re.search(r'(?:composez|tapez|composer|taper|saisissez|utiliser)(?:\s+le)?\s+(\*\d+(?:\*[^*#]*?)?#)', answer_text, re.IGNORECASE)
                if instruction_match:
                    return instruction_match.group(1)
            
            # Summarize the answer in a concise way (1-2 sentences)
            return self.answer_generator.summarize_text(answer_text, num_sentences=2).strip()
        else:
            return "Je n'ai pas d'information spécifique sur ce sujet. Veuillez consulter le site officiel de Tunisie Telecom."

# --- Evaluation Utilities ---
class EvaluationManager:
    """Utilities for evaluating agent performance"""
    
    def __init__(self, agent):
        """Initialize with the agent to evaluate"""
        self.agent = agent
        self.evaluation_results = []
    
    def evaluate_on_test_set(self, test_set):
        """Evaluate the agent on a test set"""
        logger.info(f"Starting evaluation on {len(test_set)} test cases")
        
        correct = 0
        total = len(test_set)
        results = []
        
        for i, test_case in enumerate(test_set):
            query = test_case["query"]
            expected_source_category = test_case["expected_source_category"]
            expected_csv_question_match = test_case.get("expected_csv_question_match")
            
            # Process the query
            result = self.agent.process_query(query)
            actual_source = result.get('source', 'UNKNOWN')
            actual_matched_question = result.get('matched_question')
            
            # Determine correctness
            is_correct = False
            
            if actual_source == expected_source_category:
                if expected_source_category.startswith("LOCAL_") and expected_csv_question_match:
                    if actual_matched_question == expected_csv_question_match:
                        is_correct = True
                    else:
                        logger.info(f"CSV match incorrect. Expected: '{expected_csv_question_match}', Got: '{actual_matched_question}'")
                elif expected_source_category == "WEB_TT_FOUND":
                    if result.get('answer') and result.get('informative'):
                        is_correct = True
                    else:
                        logger.info(f"Web result not informative: {result.get('answer', '')[:100]}")
                else:
                    is_correct = True  # For OFF_TOPIC, etc.
            else:
                logger.info(f"Source category mismatch. Expected: {expected_source_category}, Got: {actual_source}")
            
            if is_correct:
                correct += 1
            
            # Record result
            results.append({
                'query': query,
                'expected_source': expected_source_category,
                'expected_matched_question': expected_csv_question_match,
                'actual_source': actual_source,
                'actual_matched_question': actual_matched_question,
                'answer': result.get('formatted_answer', "No answer generated"),
                'confidence': result.get('confidence', 0),
                'is_correct': is_correct
            })
        
        # Calculate metrics
        accuracy = (correct / total) * 100 if total > 0 else 0
        
        # Calculate accuracy by category
        category_metrics = {}
        for result in results:
            expected_src = result['expected_source']
            if expected_src not in category_metrics:
                category_metrics[expected_src] = {'correct': 0, 'total': 0}
            
            category_metrics[expected_src]['total'] += 1
            if result['is_correct']:
                category_metrics[expected_src]['correct'] += 1
        
        for cat, metrics in category_metrics.items():
            metrics['accuracy'] = (metrics['correct'] / metrics['total']) * 100 if metrics['total'] > 0 else 0
        
        # Store results
        self.evaluation_results = results
        
        # Return summary
        return {
            'total_cases': total,
            'correct_cases': correct,
            'overall_accuracy': accuracy,
            'category_metrics': category_metrics,
            'results': results
        }
    
    def print_evaluation_summary(self, results=None):
        """Print a summary of evaluation results"""
        if results is None and not self.evaluation_results:
            logger.error("No evaluation results to summarize")
            return
            
        if results is None:
            # Create summary from stored results
            correct_count = sum(1 for r in self.evaluation_results if r['is_correct'])
            total_count = len(self.evaluation_results)
            overall_accuracy = (correct_count / total_count) * 100 if total_count > 0 else 0
            
            # Group by expected source
            category_metrics = {}
            for r in self.evaluation_results:
                src = r['expected_source']
                if src not in category_metrics:
                    category_metrics[src] = {'correct': 0, 'total': 0}
                
                category_metrics[src]['total'] += 1
                if r['is_correct']:
                    category_metrics[src]['correct'] += 1
            
            for cat, metrics in category_metrics.items():
                metrics['accuracy'] = (metrics['correct'] / metrics['total']) * 100 if metrics['total'] > 0 else 0
                
            results = {
                'total_cases': total_count,
                'correct_cases': correct_count,
                'overall_accuracy': overall_accuracy,
                'category_metrics': category_metrics,
                'results': self.evaluation_results
            }
        
        # Print summary information
        print("\n===== EVALUATION SUMMARY =====")
        print(f"Total test cases: {results['total_cases']}")
        print(f"Correct predictions: {results['correct_cases']}")
        print(f"Overall accuracy: {results['overall_accuracy']:.2f}%")
        
        print("\n----- Performance by Category -----")
        for category, metrics in results['category_metrics'].items():
            print(f"Category: {category}")
            print(f"  Accuracy: {metrics['accuracy']:.2f}% ({metrics['correct']}/{metrics['total']})")

# --- Interactive CLI ---
def run_interactive_mode(agent):
    """Run an interactive Q&A session"""
    print("\n===== Tunisie Telecom Q&A System =====")
    print("Ask questions about Tunisie Telecom services, products, or support.")
    print("Type 'exit', 'quit', or 'q' to end the session.")
    print("=====================================\n")
    
    while True:
        try:
            # Get user input
            user_input = input("\nAsk about Tunisie Telecom: ").strip()
            
            # Check for exit commands
            if user_input.lower() in ['exit', 'quit', 'q']:
                print("Thank you for using the Tunisie Telecom Q&A system. Goodbye!")
                break
            
            if not user_input:
                continue
            
            # Get direct answer without source attribution
            start_time = time.time()
            answer = agent.get_answer(user_input)
            end_time = time.time()
            
            # Display only the answer without prefixes
            print(f"\nAgent Answer: {answer}")
            
            # Only for debugging, can be removed for production
            print(f"\n(Query processed in {end_time - start_time:.2f} seconds)")
        
        except KeyboardInterrupt:
            print("\nSession interrupted. Exiting...")
            break
        
        except Exception as e:
            logger.error(f"Error in interactive mode: {str(e)}")
            print(f"An error occurred: {str(e)}")

# --- Main Function ---
def main():
    """Main application entry point"""
    print("Starting Tunisie Telecom Q&A System...")
    
    # Initialize the agent
    agent = EnhancedTunisieTelecomAgent()
    
    # Define evaluation set
    evaluation_set = [
        {"query": "How can I find a Tunisie Telecom store?", 
         "expected_source_category": "LOCAL_HIGH_CONF", 
         "expected_csv_question_match": "How can I find a Tunisie Telecom store?"},
        
        {"query": "What is the capital of France?", 
         "expected_source_category": "OFF_TOPIC"},
        
        {"query": "Tell me about the Doubliha offer", 
         "expected_source_category": "LOCAL_HIGH_CONF", 
         "expected_csv_question_match": "What prepaid mobile offer has Tunisie Telecom re-launched?"},
        
        {"query": "how much is tourist sim card Tunisie Telecom?", 
         "expected_source_category": "LOCAL_HIGH_CONF", 
         "expected_csv_question_match": "Who is the 'Tourist SIM' offer intended for and what is its price?"},
        
        {"query": "Tunisie Telecom financial report 2024", 
         "expected_source_category": "WEB_TT_FOUND" if Config.ENABLE_WEB_SEARCH else "TT_NOT_IN_KB_NO_WEB"},
        
        {"query": "how to activate esim for TT?", 
         "expected_source_category": "LOCAL_HIGH_CONF", 
         "expected_csv_question_match":"What are the steps to configure an eSIM on an iOS device?"},
        
        {"query": "Best pizza recipe", 
         "expected_source_category": "OFF_TOPIC"},
        
        {"query": "Tunisie telecom customer care email address", 
         "expected_source_category": "WEB_TT_FOUND" if Config.ENABLE_WEB_SEARCH else "TT_NOT_IN_KB_NO_WEB"},
        
        {"query": "What does JawekNet offer?", 
         "expected_source_category": "LOCAL_HIGH_CONF", 
         "expected_csv_question_match": "What does the 'JawekNet' option for prepaid mobile offers allow users to do?"},
    ]
    
    # Run evaluation
    evaluator = EvaluationManager(agent)
    results = evaluator.evaluate_on_test_set(evaluation_set)
    evaluator.print_evaluation_summary(results)
    
    # Run in interactive mode
    run_interactive_mode(agent)


if __name__ == "__main__":
    main()

2025-05-24 21:43:05,804 - INFO - Loaded 1887 QA pairs from CSV
2025-05-24 21:43:05,816 - INFO - Loaded pre-trained QnA model
2025-05-24 21:43:05,817 - INFO - Loaded 10 items from web content cache
2025-05-24 21:43:05,817 - INFO - TunisieTelecom Agent initialized successfully
2025-05-24 21:43:05,817 - INFO - Starting evaluation on 9 test cases
2025-05-24 21:43:05,818 - INFO - Processing query: How can I find a Tunisie Telecom store?
2025-05-24 21:43:05,819 - INFO - Local match similarity: 1.0000
2025-05-24 21:43:05,820 - INFO - Best matched question: How can I find a Tunisie Telecom store?
2025-05-24 21:43:05,820 - INFO - Query is TT-related (matched keyword: 'tunisie telecom')
2025-05-24 21:43:05,820 - INFO - Processing query: What is the capital of France?
2025-05-24 21:43:05,820 - INFO - Query is off-topic (matched off-topic keyword: 'capital of')
2025-05-24 21:43:05,821 - INFO - Processing query: Tell me about the Doubliha offer
2025-05-24 21:43:05,822 - INFO - Local match similarit

Starting Tunisie Telecom Q&A System...


2025-05-24 21:43:06,378 - INFO - Found 0 Google search results
2025-05-24 21:43:06,379 - INFO - Searching Google for: Tunisie Telecom how much is tourist sim card Tunisie Telecom?
2025-05-24 21:43:07,179 - INFO - Found 2 Google search results
2025-05-24 21:43:07,180 - INFO - Fetching content from: http://www.tunisietelecom.tn/particulier/mobile/offres-prepayees/tourist/
2025-05-24 21:43:07,606 - INFO - Fetching content from: http://www.tunisietelecom.tn/particulier/
2025-05-24 21:43:07,830 - INFO - Searching Google for: how much is tourist sim card Tunisie Telecom? Tunisie Telecom facebook site:facebook.com
2025-05-24 21:43:08,344 - INFO - Found 0 Google search results
2025-05-24 21:43:08,344 - INFO - Source category mismatch. Expected: LOCAL_HIGH_CONF, Got: WEB_TT_FOUND
2025-05-24 21:43:08,344 - INFO - Processing query: Tunisie Telecom financial report 2024
2025-05-24 21:43:08,346 - INFO - Local match similarity: 0.3581
2025-05-24 21:43:08,346 - INFO - Best matched question: How can I


===== EVALUATION SUMMARY =====
Total test cases: 9
Correct predictions: 4
Overall accuracy: 44.44%

----- Performance by Category -----
Category: LOCAL_HIGH_CONF
  Accuracy: 20.00% (1/5)
Category: OFF_TOPIC
  Accuracy: 100.00% (2/2)
Category: WEB_TT_FOUND
  Accuracy: 50.00% (1/2)

===== Tunisie Telecom Q&A System =====
Ask questions about Tunisie Telecom services, products, or support.
Type 'exit', 'quit', or 'q' to end the session.




Ask about Tunisie Telecom:  code de recharge


2025-05-24 21:43:38,048 - INFO - Processing query: code de recharge
2025-05-24 21:43:38,050 - INFO - Local match similarity: 0.2258
2025-05-24 21:43:38,050 - INFO - Best matched question: What USSD code is used to recharge a mobile line with a scratch card?
2025-05-24 21:43:38,051 - INFO - Query is TT-related (matched keyword: 'recharge')
2025-05-24 21:43:38,051 - INFO - Performing web search for additional information
2025-05-24 21:43:38,051 - INFO - Performing web search for: code de recharge
2025-05-24 21:43:38,051 - INFO - Searching Google for: code de recharge site:tunisietelecom.tn
2025-05-24 21:43:38,418 - INFO - Found 0 Google search results
2025-05-24 21:43:38,418 - INFO - Searching Google for: Tunisie Telecom code de recharge
2025-05-24 21:43:39,063 - INFO - Found 0 Google search results
2025-05-24 21:43:39,064 - INFO - Searching Google for: code de recharge Tunisie Telecom facebook site:facebook.com
2025-05-24 21:43:39,566 - INFO - Found 0 Google search results



Agent Answer: *123*code secret de la carte#

(Query processed in 1.52 seconds)



Ask about Tunisie Telecom:  code internet


2025-05-24 21:43:50,574 - INFO - Processing query: code internet
2025-05-24 21:43:50,577 - INFO - Local match similarity: 0.3140
2025-05-24 21:43:50,577 - INFO - Best matched question: What is the USSD code for activating Mobile Internet?
2025-05-24 21:43:50,578 - INFO - Query is TT-related (matched keyword: 'code')
2025-05-24 21:43:50,578 - INFO - Performing web search for additional information
2025-05-24 21:43:50,578 - INFO - Performing web search for: code internet
2025-05-24 21:43:50,579 - INFO - Searching Google for: code internet site:tunisietelecom.tn
2025-05-24 21:43:51,347 - INFO - Found 3 Google search results
2025-05-24 21:43:51,347 - INFO - Fetching content from: http://www.tunisietelecom.tn/particulier/mobile/options-services/codes-des-services/
2025-05-24 21:43:51,699 - INFO - Fetching content from: http://www.tunisietelecom.tn/particulier/mobile/internet-mobile/forfaits/
2025-05-24 21:43:52,082 - INFO - Fetching content from: http://www.tunisietelecom.tn/particulier/mob


Agent Answer: *122*2#

(Query processed in 2.69 seconds)



Ask about Tunisie Telecom:  agence tt gafsa


2025-05-24 21:45:08,430 - INFO - Processing query: agence tt gafsa
2025-05-24 21:45:08,433 - INFO - Local match similarity: 0.3761
2025-05-24 21:45:08,433 - INFO - Best matched question: Where is the Tunisie Telecom agency Gafsa Centre in Gouvernorat de Gafsa located, and how can I find it on Google Maps?
2025-05-24 21:45:08,434 - INFO - Query is TT-related (matched keyword: 'tt')
2025-05-24 21:45:08,434 - INFO - Performing web search for additional information
2025-05-24 21:45:08,435 - INFO - Performing web search for: agence tt gafsa
2025-05-24 21:45:08,435 - INFO - Searching Google for: agence tt gafsa site:tunisietelecom.tn
2025-05-24 21:45:08,954 - INFO - Found 0 Google search results
2025-05-24 21:45:08,954 - INFO - Searching Google for: Tunisie Telecom agence tt gafsa
2025-05-24 21:45:09,556 - INFO - Found 0 Google search results
2025-05-24 21:45:09,557 - INFO - Searching Google for: agence tt gafsa Tunisie Telecom facebook site:facebook.com
2025-05-24 21:45:10,199 - INFO - Foun


Agent Answer: The TT agency Gafsa Centre is located at Avenue Habib Bourguiba, 2100 Gafsa. You can find it on Google Maps here: https://www.google.com/maps/search/?api=1&query=Tunisie+Telecom+Gafsa+Centre+Avenue+Habib+Bourguiba+2100+Gafsa

(Query processed in 1.77 seconds)



Ask about Tunisie Telecom:  les forfait internet mobile


2025-05-24 21:45:35,612 - INFO - Processing query: les forfait internet mobile
2025-05-24 21:45:35,616 - INFO - Local match similarity: 0.2837
2025-05-24 21:45:35,617 - INFO - Best matched question: Which USSD code is for tracking mobile internet forfait usage?
2025-05-24 21:45:35,617 - INFO - Query is TT-related (matched keyword: 'forfait')
2025-05-24 21:45:35,617 - INFO - Performing web search for additional information
2025-05-24 21:45:35,617 - INFO - Performing web search for: les forfait internet mobile
2025-05-24 21:45:35,618 - INFO - Searching Google for: les forfait internet mobile site:tunisietelecom.tn
2025-05-24 21:45:36,179 - INFO - Found 0 Google search results
2025-05-24 21:45:36,179 - INFO - Searching Google for: Tunisie Telecom les forfait internet mobile
2025-05-24 21:45:36,691 - INFO - Found 0 Google search results
2025-05-24 21:45:36,692 - INFO - Searching Google for: les forfait internet mobile Tunisie Telecom facebook site:facebook.com
2025-05-24 21:45:37,232 - INF


Agent Answer: *122*2# Suivi du forfait internet mobile

(Query processed in 1.62 seconds)



Ask about Tunisie Telecom:  Quel sont les forfait flybox


2025-05-24 21:46:08,552 - INFO - Processing query: Quel sont les forfait flybox
2025-05-24 21:46:08,555 - INFO - Local match similarity: 0.1867
2025-05-24 21:46:08,556 - INFO - Best matched question: How can prepaid Netbox users recharge their forfait?
2025-05-24 21:46:08,556 - INFO - Query is TT-related (matched keyword: 'forfait')
2025-05-24 21:46:08,557 - INFO - Performing web search for additional information
2025-05-24 21:46:08,558 - INFO - Performing web search for: Quel sont les forfait flybox
2025-05-24 21:46:08,558 - INFO - Searching Google for: Quel sont les forfait flybox site:tunisietelecom.tn
2025-05-24 21:46:08,947 - INFO - Found 0 Google search results
2025-05-24 21:46:08,948 - INFO - Searching Google for: Tunisie Telecom Quel sont les forfait flybox
2025-05-24 21:46:09,561 - INFO - Found 0 Google search results
2025-05-24 21:46:09,563 - INFO - Searching Google for: Quel sont les forfait flybox Tunisie Telecom facebook site:facebook.com
2025-05-24 21:46:10,104 - INFO - F


Agent Answer: Je n'ai pas d'information spécifique sur ce sujet. Veuillez consulter le site officiel de Tunisie Telecom.

(Query processed in 1.55 seconds)



Ask about Tunisie Telecom:  prix de forfait 


2025-05-24 21:46:40,044 - INFO - Processing query: prix de forfait
2025-05-24 21:46:40,047 - INFO - Local match similarity: 0.1827
2025-05-24 21:46:40,048 - INFO - Best matched question: How can a Mobile Master or Member track their 'Partage de la DATA' forfait?
2025-05-24 21:46:40,048 - INFO - Query is TT-related (matched keyword: 'forfait')
2025-05-24 21:46:40,049 - INFO - Performing web search for additional information
2025-05-24 21:46:40,049 - INFO - Performing web search for: prix de forfait
2025-05-24 21:46:40,050 - INFO - Searching Google for: prix de forfait site:tunisietelecom.tn
2025-05-24 21:46:40,589 - INFO - Found 0 Google search results
2025-05-24 21:46:40,589 - INFO - Searching Google for: Tunisie Telecom prix de forfait
2025-05-24 21:46:41,007 - INFO - Found 0 Google search results
2025-05-24 21:46:41,009 - INFO - Searching Google for: prix de forfait Tunisie Telecom facebook site:facebook.com
2025-05-24 21:46:41,734 - INFO - Found 0 Google search results



Agent Answer: Je n'ai pas d'information spécifique sur ce sujet. Veuillez consulter le site officiel de Tunisie Telecom.

(Query processed in 1.69 seconds)



Ask about Tunisie Telecom:  ou est l'agence de Ariana?


2025-05-24 21:51:08,782 - INFO - Processing query: ou est l'agence de Ariana?
2025-05-24 21:51:08,788 - INFO - Local match similarity: 0.3180
2025-05-24 21:51:08,789 - INFO - Best matched question: Where is the Tunisie Telecom agency Ariana Centre in Gouvernorat de l'Ariana located, and how can I find it on Google Maps?
2025-05-24 21:51:08,791 - INFO - Query is TT-related (similarity to known question: 0.32)
2025-05-24 21:51:08,792 - INFO - Performing web search for additional information
2025-05-24 21:51:08,792 - INFO - Performing web search for: ou est l'agence de Ariana?
2025-05-24 21:51:08,792 - INFO - Searching Google for: ou est l'agence de Ariana? site:tunisietelecom.tn
2025-05-24 21:51:09,374 - INFO - Found 0 Google search results
2025-05-24 21:51:09,375 - INFO - Searching Google for: Tunisie Telecom ou est l'agence de Ariana?
2025-05-24 21:51:09,848 - INFO - Found 0 Google search results
2025-05-24 21:51:09,849 - INFO - Searching Google for: ou est l'agence de Ariana? Tunisie 


Agent Answer: Tunisie Telecom Borj Louzir 2, La Soukra. 1 316 J'aime · 1 en parlent · 33 personnes étaient ici.

(Query processed in 2.98 seconds)



Ask about Tunisie Telecom:  Agence telecom ariana


2025-05-24 21:51:36,342 - INFO - Processing query: Agence telecom ariana
2025-05-24 21:51:36,345 - INFO - Local match similarity: 0.3142
2025-05-24 21:51:36,346 - INFO - Best matched question: Where is the Tunisie Telecom agency Ariana Centre in Gouvernorat de l'Ariana located, and how can I find it on Google Maps?
2025-05-24 21:51:36,346 - INFO - Query is TT-related (matched keyword: 'telecom')
2025-05-24 21:51:36,347 - INFO - Performing web search for additional information
2025-05-24 21:51:36,347 - INFO - Performing web search for: Agence telecom ariana
2025-05-24 21:51:36,347 - INFO - Searching Google for: Agence telecom ariana site:tunisietelecom.tn
2025-05-24 21:51:36,849 - INFO - Found 0 Google search results
2025-05-24 21:51:36,850 - INFO - Searching Google for: Tunisie Telecom Agence telecom ariana
2025-05-24 21:51:37,273 - INFO - Found 0 Google search results
2025-05-24 21:51:37,274 - INFO - Searching Google for: Agence telecom ariana Tunisie Telecom facebook site:facebook.co


Agent Answer: The TT agency Ariana Centre is located at Avenue Habib Bourguiba, 2080 Ariana. You can find it on Google Maps here: https://www.google.com/maps/search/?api=1&query=Tunisie+Telecom+Ariana+Centre+Avenue+Habib+Bourguiba+2080+Ariana

(Query processed in 1.45 seconds)



Ask about Tunisie Telecom:  agence ariana


2025-05-24 21:54:51,339 - INFO - Processing query: agence ariana
2025-05-24 21:54:51,343 - INFO - Local match similarity: 0.2895
2025-05-24 21:54:51,344 - INFO - Best matched question: Where is the Tunisie Telecom agency Ariana Centre in Gouvernorat de l'Ariana located, and how can I find it on Google Maps?
2025-05-24 21:54:51,345 - INFO - Query is not TT-related: agence ariana



Agent Answer: The TT agency Ariana Centre is located at Avenue Habib Bourguiba, 2080 Ariana. You can find it on Google Maps here: https://www.google.com/maps/search/?api=1&query=Tunisie+Telecom+Ariana+Centre+Avenue+Habib+Bourguiba+2080+Ariana

(Query processed in 0.01 seconds)



Ask about Tunisie Telecom:  code recharge


2025-05-24 21:55:25,951 - INFO - Processing query: code recharge
2025-05-24 21:55:25,956 - INFO - Local match similarity: 0.2525
2025-05-24 21:55:25,956 - INFO - Best matched question: What USSD code is used to recharge a mobile line with a scratch card?
2025-05-24 21:55:25,957 - INFO - Query is TT-related (matched keyword: 'recharge')
2025-05-24 21:55:25,957 - INFO - Performing web search for additional information
2025-05-24 21:55:25,958 - INFO - Performing web search for: code recharge
2025-05-24 21:55:25,958 - INFO - Searching Google for: code recharge site:tunisietelecom.tn
2025-05-24 21:55:26,466 - INFO - Found 0 Google search results
2025-05-24 21:55:26,467 - INFO - Searching Google for: Tunisie Telecom code recharge
2025-05-24 21:55:26,857 - INFO - Found 0 Google search results
2025-05-24 21:55:26,859 - INFO - Searching Google for: code recharge Tunisie Telecom facebook site:facebook.com
2025-05-24 21:55:27,364 - INFO - Found 0 Google search results



Agent Answer: *123*code secret de la carte#

(Query processed in 1.41 seconds)



Ask about Tunisie Telecom:  code internet


2025-05-24 21:55:35,598 - INFO - Processing query: code internet
2025-05-24 21:55:35,602 - INFO - Local match similarity: 0.3140
2025-05-24 21:55:35,603 - INFO - Best matched question: What is the USSD code for activating Mobile Internet?
2025-05-24 21:55:35,604 - INFO - Query is TT-related (matched keyword: 'code')
2025-05-24 21:55:35,604 - INFO - Performing web search for additional information
2025-05-24 21:55:35,604 - INFO - Performing web search for: code internet
2025-05-24 21:55:35,605 - INFO - Searching Google for: code internet site:tunisietelecom.tn
2025-05-24 21:55:36,132 - INFO - Found 0 Google search results
2025-05-24 21:55:36,135 - INFO - Searching Google for: Tunisie Telecom code internet
2025-05-24 21:55:36,662 - INFO - Found 0 Google search results
2025-05-24 21:55:36,663 - INFO - Searching Google for: code internet Tunisie Telecom facebook site:facebook.com
2025-05-24 21:55:37,118 - INFO - Found 0 Google search results



Agent Answer: *140*0#

(Query processed in 1.52 seconds)
