In [1]:
"""
Efficient English to Nepali Translator for Hate Speech Classification
Optimized for production use with caching and proper error handling
"""

from deep_translator import GoogleTranslator
from functools import lru_cache
from typing import List, Optional
import logging

# Setup logging
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)


class NepaliTranslator:
    """
    Production-ready English to Nepali translator with caching
    Optimized for hate speech classification pipeline
    """
    
    def __init__(self, cache_size: int = 1000):
        """
        Initialize translator with caching
        
        Args:
            cache_size: Maximum number of translations to cache (default 1000)
        """
        self.translator = GoogleTranslator(source='en', target='ne')
        self.cache_size = cache_size
        
        # Create cached translation method
        self._translate_cached = lru_cache(maxsize=cache_size)(self._translate_single)
    
    def _translate_single(self, text: str) -> str:
        """
        Internal method to translate a single text (cached)
        
        Args:
            text: English text to translate
        
        Returns:
            Nepali translation
        
        Raises:
            Exception: If translation fails
        """
        if not text or not text.strip():
            return ""
        
        try:
            result = self.translator.translate(text.strip())
            return result if result else text  # Return original if translation empty
        except Exception as e:
            logger.error(f"Translation failed for text: '{text[:50]}...' - Error: {str(e)}")
            raise
    
    def translate(self, text: str, fallback_to_original: bool = True) -> str:
        """
        Translate English text to Nepali with caching
        
        Args:
            text: English text to translate
            fallback_to_original: If True, returns original text on error; 
                                 If False, raises exception
        
        Returns:
            Nepali translation (or original text if fallback enabled and error occurs)
        """
        if not text or not text.strip():
            return ""
        
        try:
            return self._translate_cached(text.strip())
        except Exception as e:
            if fallback_to_original:
                logger.warning(f"Translation failed, using original text: {str(e)}")
                return text
            else:
                raise
    
    def batch_translate(self, 
                       texts: List[str], 
                       fallback_to_original: bool = True,
                       skip_empty: bool = True) -> List[str]:
        """
        Translate multiple texts efficiently with caching
        
        Args:
            texts: List of English texts to translate
            fallback_to_original: Return original text on translation errors
            skip_empty: Skip empty/whitespace-only texts
        
        Returns:
            List of Nepali translations (same length as input)
        """
        results = []
        
        for text in texts:
            # Skip empty texts
            if skip_empty and (not text or not text.strip()):
                results.append("")
                continue
            
            # Translate with caching
            try:
                translated = self._translate_cached(text.strip())
                results.append(translated)
            except Exception as e:
                if fallback_to_original:
                    logger.warning(f"Batch translation failed for item, using original: {str(e)}")
                    results.append(text)
                else:
                    raise
        
        return results
    
    def get_cache_info(self) -> dict:
        """
        Get cache statistics
        
        Returns:
            Dictionary with cache hits, misses, size, and max size
        """
        cache_info = self._translate_cached.cache_info()
        return {
            'hits': cache_info.hits,
            'misses': cache_info.misses,
            'size': cache_info.currsize,
            'max_size': cache_info.maxsize,
            'hit_rate': cache_info.hits / (cache_info.hits + cache_info.misses) 
                       if (cache_info.hits + cache_info.misses) > 0 else 0.0
        }
    
    def clear_cache(self):
        """Clear translation cache"""
        self._translate_cached.cache_clear()
        logger.info("Translation cache cleared")
    
    def translate_with_fallback(self, text: str, max_retries: int = 2) -> str:
        """
        Translate with retry logic for transient failures
        
        Args:
            text: English text to translate
            max_retries: Maximum number of retry attempts
        
        Returns:
            Nepali translation or original text on failure
        """
        if not text or not text.strip():
            return ""
        
        for attempt in range(max_retries + 1):
            try:
                return self._translate_cached(text.strip())
            except Exception as e:
                if attempt == max_retries:
                    logger.error(f"Translation failed after {max_retries} retries: {str(e)}")
                    return text  # Fallback to original
                logger.warning(f"Translation attempt {attempt + 1} failed, retrying...")
                continue
        
        return text


# Simplified factory function for quick usage
def create_translator(cache_size: int = 1000) -> NepaliTranslator:
    """
    Factory function to create translator instance
    
    Args:
        cache_size: Maximum cache size
    
    Returns:
        Configured NepaliTranslator instance
    """
    return NepaliTranslator(cache_size=cache_size)


# Example usage for testing
if __name__ == "__main__":
    print("="*70)
    print("TESTING IMPROVED NEPALI TRANSLATOR")
    print("="*70)
    
    # Initialize
    translator = NepaliTranslator(cache_size=100)
    
    # Test single translation
    print("\n1. Single Translation:")
    print("-" * 70)
    test_text = "This is hate speech"
    result = translator.translate(test_text)
    print(f"EN: {test_text}")
    print(f"NP: {result}")
    
    # Test caching (should be instant on second call)
    print("\n2. Testing Cache (same text):")
    print("-" * 70)
    result2 = translator.translate(test_text)  # Cached - instant!
    print(f"NP: {result2}")
    print(f"Cache Info: {translator.get_cache_info()}")
    
    # Test batch translation
    print("\n3. Batch Translation:")
    print("-" * 70)
    batch_texts = [
        "You are stupid",
        "I hate this",
        "This is offensive",
        "You are stupid",  # Duplicate - will use cache!
    ]
    
    results = translator.batch_translate(batch_texts)
    for eng, nep in zip(batch_texts, results):
        print(f"EN: {eng}")
        print(f"NP: {nep}")
        print()
    
    print(f"Final Cache Info: {translator.get_cache_info()}")
    
    # Test error handling
    print("\n4. Error Handling Test:")
    print("-" * 70)
    
    # Test with empty string
    empty_result = translator.translate("")
    print(f"Empty string result: '{empty_result}'")
    
    # Test fallback
    print("\nTranslation with fallback enabled (default):")
    result_fallback = translator.translate("test", fallback_to_original=True)
    print(f"Result: {result_fallback}")
    
    print("\n" + "="*70)
    print("✅ All tests completed!")
    print("="*70)

TESTING IMPROVED NEPALI TRANSLATOR

1. Single Translation:
----------------------------------------------------------------------
EN: This is hate speech
NP: यो घृणा भाषण हो

2. Testing Cache (same text):
----------------------------------------------------------------------
NP: यो घृणा भाषण हो
Cache Info: {'hits': 1, 'misses': 1, 'size': 1, 'max_size': 100, 'hit_rate': 0.5}

3. Batch Translation:
----------------------------------------------------------------------
EN: You are stupid
NP: तिमी मुर्ख छौ

EN: I hate this
NP: म यो घृणा गर्छु

EN: This is offensive
NP: यो आपत्तिजनक छ

EN: You are stupid
NP: तिमी मुर्ख छौ

Final Cache Info: {'hits': 2, 'misses': 4, 'size': 4, 'max_size': 100, 'hit_rate': 0.3333333333333333}

4. Error Handling Test:
----------------------------------------------------------------------
Empty string result: ''

Translation with fallback enabled (default):
Result: परीक्षण

✅ All tests completed!
