In [None]:
import nltk
import numpy as np
import requests
import json
import transformers
from typing import Dict, Tuple, List, Optional
from dataclasses import dataclass
from sentence_transformers import SentenceTransformer
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt_tab')

@dataclass
class TranslationScores:
    bleu: float
    meteor: float
    bert_similarity: float
    aggregate: float

class NLTKResourceManager:
    REQUIRED_RESOURCES = [
        'punkt',
        'wordnet',
        'omw-1.4',
        'averaged_perceptron_tagger',
        'universal_tagset'
    ]

    @classmethod
    def ensure_resources(cls) -> bool:
        """Download required NLTK resources if not present."""
        try:
            for resource in cls.REQUIRED_RESOURCES:
                try:
                    nltk.data.find(f'tokenizers/{resource}')
                except LookupError:
                    print(f"Downloading {resource}...")
                    nltk.download(resource, quiet=True)
            return True
        except Exception as e:
            print(f"Error downloading NLTK resources: {str(e)}")
            return False

class TranslationMetrics:
    def __init__(self):
        self.bert_model = SentenceTransformer('all-MiniLM-L6-v2')

    def calculate_bleu(self, reference: str, candidate: str) -> float:
        """Calculate BLEU score between reference and candidate texts."""
        if not reference or not candidate:
            return 0.0
        try:
            reference = reference.lower().strip()
            candidate = candidate.lower().strip()
            
            try:
                reference_tokens = [nltk.word_tokenize(reference)]
                candidate_tokens = nltk.word_tokenize(candidate)
            except Exception:
                reference_tokens = [reference.split()]
                candidate_tokens = candidate.split()
            
            weights = (0.25, 0.25, 0.25, 0.25)
            return sentence_bleu(reference_tokens, candidate_tokens, weights=weights)
        except Exception as e:
            print(f"Error in BLEU calculation: {str(e)}")
            return 0.0

    def calculate_meteor(self, reference: str, candidate: str) -> float:
        """Calculate METEOR score between reference and candidate texts."""
        try:
            reference_tokens = nltk.word_tokenize(reference)
            candidate_tokens = nltk.word_tokenize(candidate)
            return meteor_score([reference_tokens], candidate_tokens)
        except Exception as e:
            print(f"Error in METEOR calculation: {str(e)}")
            return 0.0

    def calculate_bert_similarity(self, text1: str, text2: str) -> float:
        """Calculate BERT-based similarity between two texts."""
        if not text1 or not text2:
            return 0.0
        try:
            text1, text2 = str(text1).strip(), str(text2).strip()
            embeddings = self.bert_model.encode([text1, text2])
            return float(cosine_similarity([embeddings[0]], [embeddings[1]])[0, 0])
        except Exception as e:
            print(f"Error in BERT similarity calculation: {str(e)}")
            return 0.0

class TranslationEvaluator:
    def __init__(self):
        if not NLTKResourceManager.ensure_resources():
            raise RuntimeError("Failed to initialize NLTK resources")
        self.metrics = TranslationMetrics()

    def evaluate_translation(self, source_text: str, human_translation: str) -> TranslationScores:
        """Evaluate translation quality using multiple metrics."""
        if not source_text or not human_translation:
            raise ValueError("Source text and human translation cannot be empty")

        scores = TranslationScores(
            bleu=self.metrics.calculate_bleu(source_text, human_translation),
            meteor=self.metrics.calculate_meteor(source_text, human_translation),
            bert_similarity=self.metrics.calculate_bert_similarity(source_text, human_translation),
            aggregate=0.0
        )

        valid_scores = [score for score in [scores.bleu, scores.meteor, scores.bert_similarity] 
                       if score is not None and score >= 0]
        scores.aggregate = np.mean(valid_scores) if valid_scores else 0.0

        return scores

class OllamaTranslator:
    OLLAMA_API_URL = "http://localhost:11434"
    DEFAULT_TIMEOUT = 300  # 5 minutes timeout for longer translations

    @classmethod
    def check_status(cls) -> bool:
        """Check if Ollama server is running and accessible."""
        try:
            response = requests.get(f"{cls.OLLAMA_API_URL}/api/tags")
            return response.status_code == 200
        except requests.exceptions.RequestException:
            return False

    @classmethod
    def _process_stream(cls, response: requests.Response) -> str:
        """Process streaming response from Ollama."""
        full_response = []
        try:
            for line in response.iter_lines():
                if line:
                    json_response = json.loads(line)
                    if 'response' in json_response:
                        full_response.append(json_response['response'])
                        # Print progress without newline
                        print('.', end='', flush=True)
                    if json_response.get('done', False):
                        print()  # New line after completion
                        break
        except json.JSONDecodeError as e:
            raise ValueError(f"Failed to decode JSON from stream: {e}")
        
        return ''.join(full_response).strip()

    @classmethod
    def translate(cls, text: str, source_lang: str = "en", target_lang: str = "hi", 
                 temperature: float = 0.7, timeout: int = None) -> str:
        """
        Translate text using Ollama with streaming response handling.

        Args:
            text: Text to translate
            source_lang: Source language code
            target_lang: Target language code
            temperature: Model temperature (0.0 to 1.0)
            timeout: Request timeout in seconds (None for default)

        Returns:
            Translated text

        Raises:
            ConnectionError: If Ollama server is not accessible
            ValueError: If translation fails
        """
        if not cls.check_status():
            raise ConnectionError(
                "Cannot connect to Ollama server. Please ensure:\n"
                "1. Ollama is installed (https://ollama.ai/download)\n"
                "2. The Ollama service is running (run 'ollama serve' in terminal)\n"
                "3. You have pulled the required model (run 'ollama pull llama3.1')"
            )

        prompt = (
            f"Translate the following {source_lang} text to {target_lang}. "
            f"Provide only the direct translation without any additional comments or explanations.\n"
            f"Text: {text}\n"
            "Translation:"
        )

        try:
            print("Starting translation", end='', flush=True)
            
            response = requests.post(
                f"{cls.OLLAMA_API_URL}/api/generate",
                json={
                    "model": "llama3.1:70b",
                    "prompt": prompt,
                    "stream": True,
                    "temperature": temperature,
                },
                headers={"Content-Type": "application/json"},
                timeout=timeout or cls.DEFAULT_TIMEOUT,
                stream=True
            )

            response.raise_for_status()
            translated_text = cls._process_stream(response)

            if not translated_text:
                raise ValueError("Received empty translation from Ollama")

            return translated_text

        except requests.exceptions.Timeout:
            raise ValueError(f"Translation request timed out after {timeout or cls.DEFAULT_TIMEOUT} seconds")
        except requests.exceptions.RequestException as e:
            raise ValueError(f"Translation request failed: {str(e)}")
        except Exception as e:
            raise ValueError(f"Unexpected error during translation: {str(e)}")

def main():
    source_text = "I'll go to Mumbai tomorrow"
    human_translation = "मैं कल मुंबई जाऊंगा"

    try:
        # First try machine translation
        print("\nPerforming machine translation...")
        machine_translation = OllamaTranslator.translate(
            source_text,
            source_lang="en",
            target_lang="hi",
            temperature=0.7  # Adjust temperature for balance of creativity and accuracy
        )
        print(f"\nMachine Translation: {machine_translation}")

        # Then evaluate both translations
        print("\nEvaluating translations...")
        evaluator = TranslationEvaluator()
        
        # Evaluate human translation
        human_scores = evaluator.evaluate_translation(source_text, human_translation)
        print("\nHuman Translation Scores:")
        print(f"BLEU Score: {human_scores.bleu:.4f}")
        print(f"METEOR Score: {human_scores.meteor:.4f}")
        print(f"BERT Similarity: {human_scores.bert_similarity:.4f}")
        print(f"Aggregate Quality Score: {human_scores.aggregate:.4f}")

        # Evaluate machine translation
        machine_scores = evaluator.evaluate_translation(source_text, machine_translation)
        print("\nMachine Translation Scores:")
        print(f"BLEU Score: {machine_scores.bleu:.4f}")
        print(f"METEOR Score: {machine_scores.meteor:.4f}")
        print(f"BERT Similarity: {machine_scores.bert_similarity:.4f}")
        print(f"Aggregate Quality Score: {machine_scores.aggregate:.4f}")

    except Exception as e:
        print(f"\nError: {str(e)}")


main()



Performing machine translation...


[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\MSI\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


Starting translation..............

Machine Translation: मैंले कल मुंबई जाऊंगा.

Evaluating translations...
Downloading wordnet...
Downloading omw-1.4...
Downloading averaged_perceptron_tagger...
Downloading universal_tagset...

Human Translation Scores:
BLEU Score: 0.0000
METEOR Score: 0.0000
BERT Similarity: 0.2289
Aggregate Quality Score: 0.0763

Machine Translation Scores:
BLEU Score: 0.0000
METEOR Score: 0.0000
BERT Similarity: 0.2262
Aggregate Quality Score: 0.0754
