In [12]:
import nltk
import sklearn
from sentence_transformers import SentenceTransformer
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import requests
import json

def ensure_nltk_resources():
    """
    Download all required NLTK resources.
    Returns True if successful, False if any download fails.
    """
    required_resources = [
        'punkt',
        'wordnet',
        'omw-1.4',
        'averaged_perceptron_tagger',
        'universal_tagset'
    ]
    
    try:
        for resource in required_resources:
            try:
                nltk.data.find(f'tokenizers/{resource}')
            except LookupError:
                print(f"Downloading {resource}...")
                nltk.download(resource, quiet=True)
        return True
    except Exception as e:
        print(f"Error downloading NLTK resources: {str(e)}")
        return False

def calculate_bleu_score(reference, candidate):
    """
    Calculate BLEU score between reference and candidate texts with improved tokenization.
    """
    if not reference or not candidate:
        return 0.0
    try:
        # Convert to lowercase and tokenize
        reference = reference.lower().strip()
        candidate = candidate.lower().strip()
        
        # Use word_tokenize instead of punkt directly
        reference_tokens = [reference.split()]  # Simple word splitting as fallback
        candidate_tokens = candidate.split()
        
        try:
            # Try NLTK tokenization if available
            reference_tokens = [nltk.word_tokenize(reference)]
            candidate_tokens = nltk.word_tokenize(candidate)
        except Exception as e:
            print(f"Warning: Using simple tokenization due to NLTK error: {str(e)}")
        
        # Calculate BLEU score with smoothing
        weights = (0.25, 0.25, 0.25, 0.25)  # Equal weights for 1-4 grams
        return sentence_bleu(reference_tokens, candidate_tokens, weights=weights)
    except Exception as e:
        print(f"Error in BLEU score calculation: {str(e)}")
        return 0.0

def calculate_meteor_score(reference, candidate):
    """Calculate METEOR score with improved error handling."""
    if not reference or not candidate:
        return 0.0
    try:
        # Convert to lowercase and ensure proper string format
        reference = str(reference).lower().strip()
        candidate = str(candidate).lower().strip()
        
        return meteor_score([reference], candidate)
    except Exception as e:
        print(f"Error in METEOR score calculation: {str(e)}")
        return 0.0

def evaluate_translation(source_text, human_translation, target_lang="hi"):
    """
    Evaluate translation quality using multiple metrics.
    """
    # Ensure NLTK resources are available
    if not ensure_nltk_resources():
        raise ValueError("Failed to download required NLTK resources")

    if not source_text or not human_translation:
        raise ValueError("Source text and human translation cannot be empty")

    try:
        bert_model = SentenceTransformer('all-MiniLM-L6-v2')
    except Exception as e:
        raise ValueError(f"Failed to load BERT model: {str(e)}")

    # Calculate scores
    scores = {
        "BLEU Score (Direct)": calculate_bleu_score(source_text, human_translation),
        "METEOR Score (Direct)": calculate_meteor_score(source_text, human_translation),
        "BERT Similarity (Direct)": calculate_bert_similarity(source_text, human_translation, bert_model)
    }

    # Calculate aggregate score from valid scores only
    valid_scores = [score for score in scores.values() if score is not None and score >= 0]
    aggregate_score = np.mean(valid_scores) if valid_scores else 0.0

    return scores, aggregate_score

def calculate_bert_similarity(text1, text2, model):
    """Calculate BERT-based similarity between two texts."""
    if not text1 or not text2:
        return 0.0
    try:
        # Ensure texts are strings and properly formatted
        text1 = str(text1).strip()
        text2 = str(text2).strip()
        
        embeddings = model.encode([text1, text2])
        return float(cosine_similarity([embeddings[0]], [embeddings[1]])[0, 0])
    except Exception as e:
        print(f"Error in BERT similarity calculation: {str(e)}")
        return 0.0

if __name__ == "__main__":
    # Example usage
    source_text = "I'll go to Mumbai tomorrow"
    human_translation = "मैं कल मुंबई जाऊंगा"
    
    try:
        # First ensure all NLTK resources are available
        if not ensure_nltk_resources():
            print("Failed to initialize NLTK resources. Please check your internet connection and try again.")
            exit(1)
            
        scores, aggregate_score = evaluate_translation(source_text, human_translation)
        
        print("\nDetailed Scores:")
        for metric, score in scores.items():
            print(f"{metric}: {score:.4f}")
        
        print(f"\nAggregate Quality Score: {aggregate_score:.4f}")
        
    except ValueError as e:
        print(f"Evaluation Error: {str(e)}")
    except Exception as e:
        print(f"Unexpected Error: {str(e)}")
        import traceback
        traceback.print_exc()

Downloading wordnet...
Downloading omw-1.4...
Downloading averaged_perceptron_tagger...
Downloading universal_tagset...
Downloading wordnet...
Downloading omw-1.4...
Downloading averaged_perceptron_tagger...
Downloading universal_tagset...
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/Users/vikk/nltk_data'
    - '/Library/Frameworks/Python.framework/Versions/3.12/nltk_data'
    - '/Library/Frameworks/Python.framework/Versions/3.12/share/nltk_data'
    - '/Library/Frameworks/Python.framework/Versions/3.12/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
*********************

In [13]:
import nltk
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml
