# Translation Quality Metrics Utilities

This notebook provides utilities for:
1. Calculating spelling error ratios between text pairs
2. Computing embedding distances using sentence transformers
3. Comprehensive translation quality metrics

In [1]:
from typing import Dict, Optional, Tuple
import numpy as np
import re
import difflib

# Auto-install required packages
try:
    from sentence_transformers import SentenceTransformer
    from scipy.spatial.distance import cosine, euclidean
except ImportError:
    print("Installing required packages...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "sentence-transformers", "scipy"])
    from sentence_transformers import SentenceTransformer
    from scipy.spatial.distance import cosine, euclidean

Installing required packages...


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class EmbeddingCalculator:
    """
    Singleton-like class for efficient embedding calculations.
    Caches the model to avoid reloading on every calculation.
    """
    _instance = None
    _model = None
    _model_name = None
    
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance
    
    def get_model(self, model_name: str = 'all-MiniLM-L6-v2') -> SentenceTransformer:
        """Load or retrieve cached sentence transformer model."""
        if self._model is None or self._model_name != model_name:
            self._model = SentenceTransformer(model_name)
            self._model_name = model_name
        return self._model

## Helper Functions

In [17]:
def _normalize_text(text: str) -> str:
    """Normalize text by removing punctuation and converting to lowercase."""
    text = re.sub(r'[.,!?;:\'"()\[\]{}<>]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.lower().strip()


def _calculate_word_difference_ratio(text1: str, text2: str) -> float:
    """Calculate ratio using symmetric difference of word sets.

    This version divides by the size of the union of both word sets so the
    returned ratio is always between 0.0 and 1.0 (0% - 100%)."""
    words1 = set(_normalize_text(text1).split())
    words2 = set(_normalize_text(text2).split())

    if not words1 and not words2:
        return 0.0

    differing_words = words1.symmetric_difference(words2)
    total_unique_words = len(words1.union(words2))

    return len(differing_words) / total_unique_words if total_unique_words > 0 else 0.0


def _calculate_levenshtein_ratio(text1: str, text2: str) -> float:
    """Calculate character-level Levenshtein distance ratio."""
    norm_text1 = _normalize_text(text1)
    norm_text2 = _normalize_text(text2)
    
    ratio = difflib.SequenceMatcher(None, norm_text1, norm_text2).ratio()
    return 1.0 - ratio


def _calculate_sequence_matcher_ratio(text1: str, text2: str) -> float:
    """Calculate error ratio using difflib.SequenceMatcher."""
    norm_text1 = _normalize_text(text1)
    norm_text2 = _normalize_text(text2)
    
    similarity = difflib.SequenceMatcher(None, norm_text1, norm_text2).ratio()
    return 1.0 - similarity

## Main Functions

In [18]:
def calculate_spelling_error_ratio(
    text1: str,
    text2: str,
    method: str = 'symmetric_difference'
) -> float:
    """
    Calculate the ratio of spelling/word errors between two text strings.
    
    This function compares two texts at the word level to determine how different
    they are. Useful for measuring translation quality degradation.
    
    Args:
        text1: First text string (typically the original)
        text2: Second text string (typically the final translation)
        method: Comparison method to use:
            - 'symmetric_difference': Words that appear in one text but not both
            - 'levenshtein': Character-level edit distance ratio
            - 'sequence_matcher': Uses difflib.SequenceMatcher for similarity
    
    Returns:
        float: Error ratio between 0.0 (identical) and 1.0 (completely different)
        
    Examples:
        >>> calculate_spelling_error_ratio("The cat sits", "The cat sits")
        0.0
        >>> calculate_spelling_error_ratio("The cat sits", "The dog runs")
        0.6667
    """
    if not text1 and not text2:
        return 0.0
    
    if not text1 or not text2:
        return 1.0
    
    if method == 'symmetric_difference':
        return _calculate_word_difference_ratio(text1, text2)
    elif method == 'levenshtein':
        return _calculate_levenshtein_ratio(text1, text2)
    elif method == 'sequence_matcher':
        return _calculate_sequence_matcher_ratio(text1, text2)
    else:
        raise ValueError(f"Unknown method: {method}. Use 'symmetric_difference', 'levenshtein', or 'sequence_matcher'")

In [19]:
def calculate_embedding_distance(
    text1: str,
    text2: str,
    model_name: str = 'all-MiniLM-L6-v2',
    return_all_metrics: bool = False
) -> float | Dict[str, float]:
    """
    Calculate semantic distance between two texts using sentence embeddings.
    
    This function uses state-of-the-art sentence transformers to create dense
    vector representations of texts and compute their semantic similarity.
    
    Args:
        text1: First text string
        text2: Second text string
        model_name: Sentence transformer model to use. Options:
            - 'all-MiniLM-L6-v2' (default, fast, 384 dimensions)
            - 'all-mpnet-base-v2' (slower, more accurate, 768 dimensions)
            - 'paraphrase-multilingual-MiniLM-L12-v2' (multilingual)
        return_all_metrics: If True, return dict with all distance metrics
    
    Returns:
        float: Cosine distance (0.0 = identical, 2.0 = opposite) if return_all_metrics=False
        dict: Dictionary with multiple metrics if return_all_metrics=True:
            - cosine_distance: 1 - cosine_similarity (0 to 2)
            - cosine_similarity: Cosine similarity (-1 to 1)
            - euclidean_distance: L2 distance between embeddings
            - manhattan_distance: L1 distance between embeddings
    
    Examples:
        >>> calculate_embedding_distance("The cat sits", "The cat sits")
        0.0
        >>> calculate_embedding_distance("I love pizza", "Pizza is great")
        0.234
    """
    if not text1 or not text2:
        return 1.0 if not return_all_metrics else {
            'cosine_distance': 1.0,
            'cosine_similarity': 0.0,
            'euclidean_distance': 0.0,
            'manhattan_distance': 0.0
        }
    
    # Get cached model
    calculator = EmbeddingCalculator()
    model = calculator.get_model(model_name)
    
    # Generate embeddings
    embedding1 = model.encode(text1, convert_to_numpy=True)
    embedding2 = model.encode(text2, convert_to_numpy=True)
    
    # Calculate cosine distance and similarity
    cos_dist = cosine(embedding1, embedding2)
    cos_sim = 1.0 - cos_dist
    
    if not return_all_metrics:
        return float(cos_dist)
    
    # Calculate additional metrics
    eucl_dist = euclidean(embedding1, embedding2)
    manh_dist = np.sum(np.abs(embedding1 - embedding2))
    
    return {
        'cosine_distance': float(cos_dist),
        'cosine_similarity': float(cos_sim),
        'euclidean_distance': float(eucl_dist),
        'manhattan_distance': float(manh_dist)
    }

In [20]:
def calculate_translation_quality_metrics(
    original: str,
    translated: str,
    model_name: str = 'all-MiniLM-L6-v2'
) -> Dict[str, float]:
    """
    Calculate comprehensive quality metrics for a translation pair.
    
    This is a convenience function that computes both spelling error ratio
    and embedding distance in a single call.
    
    Args:
        original: Original text
        translated: Translated text (after full translation chain)
        model_name: Sentence transformer model to use
    
    Returns:
        dict: Dictionary containing all quality metrics:
            - spelling_error_ratio: Word-level difference ratio
            - embedding_distance: Semantic distance (cosine)
            - embedding_similarity: Semantic similarity (1 - distance)
    
    Examples:
        >>> metrics = calculate_translation_quality_metrics(
        ...     "The cat sits on the mat",
        ...     "A cat is sitting on a mat"
        ... )
        >>> print(f"Error ratio: {metrics['spelling_error_ratio']:.2f}")
        >>> print(f"Semantic similarity: {metrics['embedding_similarity']:.2f}")
    """
    error_ratio = calculate_spelling_error_ratio(original, translated)
    embedding_dist = calculate_embedding_distance(original, translated, model_name)
    
    return {
        'spelling_error_ratio': float(error_ratio),
        'embedding_distance': float(embedding_dist),
        'embedding_similarity': float(1.0 - embedding_dist)
    }

## Example Usage

### Spelling Error Ratio Examples

In [21]:
# Example 1: Identical sentences
text1 = "The cat sits on the mat"
text2 = "The cat sits on the mat"
ratio = calculate_spelling_error_ratio(text1, text2)
print(f"Text 1: {text1}")
print(f"Text 2: {text2}")
print(f"Error Ratio: {ratio:.4f} ({ratio*100:.2f}%)\n")

Text 1: The cat sits on the mat
Text 2: The cat sits on the mat
Error Ratio: 0.0000 (0.00%)



In [22]:
# Example 2: Similar sentences with different words
text1 = "The cat sits on the mat"
text2 = "A feline rests on the rug"
ratio = calculate_spelling_error_ratio(text1, text2)
print(f"Text 1: {text1}")
print(f"Text 2: {text2}")
print(f"Error Ratio: {ratio:.4f} ({ratio*100:.2f}%)\n")

Text 1: The cat sits on the mat
Text 2: A feline rests on the rug
Error Ratio: 0.7778 (77.78%)



In [23]:
# Example 3: Different comparison methods
text1 = "The weather is beautiful today"
text2 = "The weather was beautiful yesterday"

for method in ['symmetric_difference', 'levenshtein', 'sequence_matcher']:
    ratio = calculate_spelling_error_ratio(text1, text2, method=method)
    print(f"Method: {method:25s} -> Error Ratio: {ratio:.4f}")

Method: symmetric_difference      -> Error Ratio: 0.5714
Method: levenshtein               -> Error Ratio: 0.1385
Method: sequence_matcher          -> Error Ratio: 0.1385


### Embedding Distance Examples

In [24]:
# Example 1: Identical sentences
text1 = "The cat sits on the mat"
text2 = "The cat sits on the mat"
distance = calculate_embedding_distance(text1, text2)
print(f"Text 1: {text1}")
print(f"Text 2: {text2}")
print(f"Embedding Distance: {distance:.4f}")
print(f"Similarity: {(1-distance)*100:.2f}%\n")

Text 1: The cat sits on the mat
Text 2: The cat sits on the mat
Embedding Distance: 0.0000
Similarity: 100.00%



In [25]:
# Example 2: Semantically similar sentences
text1 = "The cat sits on the mat"
text2 = "A feline rests on the rug"
distance = calculate_embedding_distance(text1, text2)
print(f"Text 1: {text1}")
print(f"Text 2: {text2}")
print(f"Embedding Distance: {distance:.4f}")
print(f"Similarity: {(1-distance)*100:.2f}%\n")

Text 1: The cat sits on the mat
Text 2: A feline rests on the rug
Embedding Distance: 0.4393
Similarity: 56.07%



In [26]:
# Example 3: Different sentences
text1 = "I love programming in Python"
text2 = "The weather is beautiful today"
distance = calculate_embedding_distance(text1, text2)
print(f"Text 1: {text1}")
print(f"Text 2: {text2}")
print(f"Embedding Distance: {distance:.4f}")
print(f"Similarity: {(1-distance)*100:.2f}%\n")

Text 1: I love programming in Python
Text 2: The weather is beautiful today
Embedding Distance: 0.9339
Similarity: 6.61%



In [27]:
# Example 4: All metrics
text1 = "The quick brown fox jumps over the lazy dog"
text2 = "The fast brown fox leaps over the lazy dog"
metrics = calculate_embedding_distance(text1, text2, return_all_metrics=True)
print(f"Text 1: {text1}")
print(f"Text 2: {text2}")
print("\nAll Metrics:")
for key, value in metrics.items():
    print(f"  {key:25s}: {value:.4f}")

Text 1: The quick brown fox jumps over the lazy dog
Text 2: The fast brown fox leaps over the lazy dog

All Metrics:
  cosine_distance          : 0.0257
  cosine_similarity        : 0.9743
  euclidean_distance       : 0.2269
  manhattan_distance       : 3.4938


### Translation Quality Metrics Examples

In [28]:
# Example: Comprehensive translation quality assessment
original = "The cat sits on the mat"
back_translated = "A cat is sitting on a mat"

metrics = calculate_translation_quality_metrics(original, back_translated)
print(f"Original: {original}")
print(f"Back-translated: {back_translated}")
print("\nQuality Metrics:")
print(f"  Spelling Error Ratio: {metrics['spelling_error_ratio']:.4f} ({metrics['spelling_error_ratio']*100:.2f}%)")
print(f"  Embedding Distance: {metrics['embedding_distance']:.4f}")
print(f"  Embedding Similarity: {metrics['embedding_similarity']:.4f} ({metrics['embedding_similarity']*100:.2f}%)")
print(f"\nInterpretation: A similarity of {metrics['embedding_similarity']*100:.1f}% suggests the translation preserved meaning {'well' if metrics['embedding_similarity'] > 0.7 else 'moderately' if metrics['embedding_similarity'] > 0.5 else 'poorly'}.")

Original: The cat sits on the mat
Back-translated: A cat is sitting on a mat

Quality Metrics:
  Spelling Error Ratio: 0.6250 (62.50%)
  Embedding Distance: 0.0274
  Embedding Similarity: 0.9726 (97.26%)

Interpretation: A similarity of 97.3% suggests the translation preserved meaning well.


In [29]:
# Example: Compare multiple translations
original = "I love programming in Python"
translations = [
    "Python programming is something I enjoy",
    "I enjoy coding with Python",
    "Programming in Python is fun for me",
    "The weather is nice today"  # Poor translation
]

print(f"Original: {original}\n")
for i, trans in enumerate(translations, 1):
    metrics = calculate_translation_quality_metrics(original, trans)
    print(f"Translation {i}: {trans}")
    print(f"  Similarity: {metrics['embedding_similarity']*100:.1f}%, Error Ratio: {metrics['spelling_error_ratio']*100:.1f}%\n")

Original: I love programming in Python

Translation 1: Python programming is something I enjoy
  Similarity: 91.9%, Error Ratio: 62.5%

Translation 2: I enjoy coding with Python
  Similarity: 92.9%, Error Ratio: 75.0%

Translation 3: Programming in Python is fun for me
  Similarity: 91.7%, Error Ratio: 66.7%

Translation 4: The weather is nice today
  Similarity: 8.0%, Error Ratio: 100.0%

Translation 4: The weather is nice today
  Similarity: 8.0%, Error Ratio: 100.0%



Real implementation

In [30]:
original_sentence = "The Trump administration is eager to use the momentum of talks with Ukrainian and Russian officials to try and force both Presidents Volodymyr Zelensky and Vladimir Putin to the table on an initial ceasefire deal, those sources said"
errored_sentence = "The Trump adminstration is egar to use the momemtum of talks with Ukrianian and Russain officals to try and force both President Volodymyr Zelensky and Vladmir Putin to the tabel on an inital ceasefire deal, thoze sources said."


In [31]:
ratio = calculate_spelling_error_ratio(original_sentence, errored_sentence)
print(f"Error Ratio: {ratio:.4f} ({ratio*100:.2f}%)\n")

Error Ratio: 0.5116 (51.16%)

