In [27]:
import nltk
from nltk.translate import AlignedSent, IBMModel1, bleu_score, meteor_score, chrf_score
from nltk.tokenize import word_tokenize, TreebankWordDetokenizer
import os
import random
import numpy as np
from collections import defaultdict
# import pickle
import dill as pickle
import sacrebleu
import torch
torch.set_float32_matmul_precision('high')

try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

try:
    from comet import download_model, load_from_checkpoint
    COMET_AVAILABLE = True
    comet_model_path = download_model("Unbabel/wmt22-comet-da")
    comet_model = load_from_checkpoint(comet_model_path)
except ImportError:
    print("COMET library not found. Installing: pip install unbabel-comet")
    COMET_AVAILABLE = False
except Exception as e:
    print(f"Error loading COMET: {e}. Running without COMET.")
    COMET_AVAILABLE = False

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

c:\Users\shang\anaconda3\envs\colab_gpu\lib\site-packages\lightning_fabric\utilities\cloud_io.py:73: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
Lightning automatically upgraded your loade

In [28]:
data_path = r"C:\Users\shang\Desktop\clean"

def load_data(lang_dir):
    """Loads and tokenizes data from a language directory."""
    en_path = os.path.join(data_path, lang_dir, 'all.en')
    es_path = os.path.join(data_path, lang_dir, 'all.es')
    
    if not os.path.exists(en_path) or not os.path.exists(es_path):
        return None
        
    with open(en_path, 'r', encoding='utf-8') as f:
        en_lines = f.readlines()
    with open(es_path, 'r', encoding='utf-8') as f:
        es_lines = f.readlines()
        
    # Basic tokenization
    en_tokens = [word_tokenize(line.strip().lower()) for line in en_lines]
    es_tokens = [word_tokenize(line.strip().lower()) for line in es_lines]
    
    # Filter out empty lines
    data = []
    for en, es in zip(en_tokens, es_tokens):
        if en and es:
            data.append((en, es))
            
    return data

In [29]:
def train(train_data, iterations=10):
    """Trains IBM Model 1 on the given training data.
    
    Args:
        train_data: List of (source_tokens, target_tokens) tuples
        iterations: Number of EM iterations (default: 10)
    
    Returns:
        Trained IBMModel1 instance
    """
    aligned_corpus = [AlignedSent(target, source) for source, target in train_data]
    ibm1 = IBMModel1(aligned_corpus, iterations)

    if isinstance(ibm1.translation_table, defaultdict):
        ibm1.translation_table.default_factory = None
        for v in ibm1.translation_table.values():
            if isinstance(v, defaultdict):
                v.default_factory = None
    
    return ibm1

def build_translation_dict(model):
    """Builds a translation dictionary from the trained model.
    
    Args:
        model: Trained IBMModel1 instance
    
    Returns:
        Dictionary mapping source words to target words
    """
    translation_dict = {}
    s_to_t_probs = defaultdict(list)
    
    # Extract translation probabilities
    for t in model.translation_table:
        for s in model.translation_table[t]:
            prob = model.translation_table[t][s]
            if prob > 1e-6:
                s_to_t_probs[s].append((t, prob))
    
    # For each source word, pick the target word with highest probability
    for s in s_to_t_probs:
        best_t = sorted(s_to_t_probs[s], key=lambda x: x[1], reverse=True)[0][0]
        translation_dict[s] = best_t
    
    return translation_dict


def translate(model, source_tokens):
    """Translates source tokens using the trained model.
    
    Args:
        model: Trained IBMModel1 instance
        source_tokens: List of source language tokens
    
    Returns:
        List of target language tokens
    """
    translation_dict = build_translation_dict(model)
    translated = []
    
    for word in source_tokens:
        if word in translation_dict:
            translated.append(translation_dict[word])
        else:
            translated.append(word)  # Keep original if no translation found
    
    return translated

'''
def evaluate(model, test_data):
    """Evaluates the model on test data using BLEU score.
    
    Args:
        model: Trained IBMModel1 instance
        test_data: List of (source_tokens, target_tokens) tuples
    
    Returns:
        BLEU score (float)
    """
    references = []
    hypotheses = []
    
    for source, target in test_data:
        translated = translate(model, source)
        references.append([target])
        hypotheses.append(translated)
    
    score = bleu_score.corpus_bleu(references, hypotheses)
    return score
'''

def evaluate_metrics(model, test_data):
    """
    Evaluates the model using BLEU, ChrF, METEOR, and COMET.
    """

    detokenizer = TreebankWordDetokenizer()
    
    refs_tokens_list = [] 
    hyps_tokens_list = []    
    
    meteor_scores = []

    src_strs = []
    ref_strs = []
    hyp_strs = []
    
    for source_tokens, target_tokens in test_data:
        hyp_tokens = translate(model, source_tokens)
        
        refs_tokens_list.append([target_tokens])
        hyps_tokens_list.append(hyp_tokens)
        
        m_score = meteor_score.meteor_score([target_tokens], hyp_tokens)
        meteor_scores.append(m_score)
        
        src_str = detokenizer.detokenize(source_tokens)
        ref_str = detokenizer.detokenize(target_tokens)
        hyp_str = detokenizer.detokenize(hyp_tokens)
        
        src_strs.append(src_str)
        ref_strs.append(ref_str)
        hyp_strs.append(hyp_str)
        
    results = {}
    
    bleu = bleu_score.corpus_bleu(refs_tokens_list, hyps_tokens_list)
    results['BLEU'] = bleu
    
    results['METEOR'] = np.mean(meteor_scores)
    
    chrf = chrf_score.corpus_chrf(ref_strs, hyp_strs)
    results['ChrF'] = chrf
    
    if COMET_AVAILABLE:
        print("Calculating COMET...")
        data = [{"src": s, "mt": h, "ref": r} for s, h, r in zip(src_strs, hyp_strs, ref_strs)]
        try:
            comet_output = comet_model.predict(data, batch_size=128, gpus=1)
            results['COMET'] = comet_output.system_score
        except Exception as e:
            print(f"Error running COMET: {e}")
            results['COMET'] = None
    else:
        results['COMET'] = None
        
    return results
'''
def save_model(model, model_path):
    """Saves the trained model to disk using pickle.
    
    Args:
        model: Trained IBMModel1 instance
        model_path: Path where to save the model
    """
    # Create directory if it doesn't exist
    model_dir = os.path.dirname(model_path)
    if model_dir:
        os.makedirs(model_dir, exist_ok=True)
    
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    print(f"Model saved to {model_path}")
'''

'\ndef save_model(model, model_path):\n    """Saves the trained model to disk using pickle.\n    \n    Args:\n        model: Trained IBMModel1 instance\n        model_path: Path where to save the model\n    """\n    # Create directory if it doesn\'t exist\n    model_dir = os.path.dirname(model_path)\n    if model_dir:\n        os.makedirs(model_dir, exist_ok=True)\n    \n    with open(model_path, \'wb\') as f:\n        pickle.dump(model, f)\n    print(f"Model saved to {model_path}")\n'

In [24]:
'''
def train_and_evaluate(lang_dir, save_model_flag=False):
    """Convenience function to train and evaluate a model for a language directory.
    
    Args:
        lang_dir: Language directory name
        save_model_flag: Whether to save the trained model (default: False)
    
    Returns:
        BLEU score (float) or None if data not found
    """
    print(f"Processing {lang_dir}...")
    data = load_data(lang_dir)
    if not data:
        print(f"Skipping {lang_dir} (files not found or empty)")
        return None
        
    # Split data
    random.seed(42)
    random.shuffle(data)
    split_idx = int(len(data) * 0.8)
    train_data = data[:split_idx]
    test_data = data[split_idx:]
    
    print(f"  Training on {len(train_data)} sentences...")
    model = train(train_data, iterations=10)
    
    # Save model if requested

    if save_model_flag:
        model_path = os.path.join('models', f'{lang_dir}_ibm1.pkl')
        save_model(model, model_path)

    # print("  Evaluating...")
    # score = evaluate(model, test_data)
    # print(f"  BLEU Score: {score:.4f}")
    return # score
'''
def train_and_evaluate(lang_dir, save_model_flag=False):
    """Convenience function to train and evaluate a model for a language directory.
    
    Args:
        lang_dir: Language directory name
        save_model_flag: Whether to save the trained model (default: False)
    
    Returns:
        BLEU score (float) or None if data not found
    """
    print(f"Processing {lang_dir}...")
    data = load_data(lang_dir)
    if not data:
        print(f"Skipping {lang_dir}")
        return None
        
    random.seed(42)
    random.shuffle(data)
 

    split_idx = int(len(data) * 0.8)
    train_data = data[:split_idx]
    test_data = data[split_idx:]
    
    print(f"Training on {len(train_data)} sentences...")
    model = train(train_data, iterations=10)
    '''
    if save_model_flag:
        model_path = os.path.join('models', f'{lang_dir}_ibm1.pkl')
        save_model(model, model_path)
    '''
    print("Evaluating...")
    scores = evaluate_metrics(model, test_data)
    
    print("Results:")
    for k, v in scores.items():
        if v is not None:
            print(f"{k}: {v:.4f}")
            
    return scores
'''
# Run for all datasets
subdirs = [d for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]
results = {}

for d in sorted(subdirs):
    score = train_and_evaluate(d, True)
    # if score is not None:
        # results[d] = score
        
print("\nFinal Results:")
# for lang, score in results.items():
    # print(f"{lang}: {score:.4f}")
'''

'\n# Run for all datasets\nsubdirs = [d for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))]\nresults = {}\n\nfor d in sorted(subdirs):\n    score = train_and_evaluate(d, True)\n    # if score is not None:\n        # results[d] = score\n        \nprint("\nFinal Results:")\n# for lang, score in results.items():\n    # print(f"{lang}: {score:.4f}")\n'

In [None]:
results

In [30]:
import os
import random
# import pickle
import dill as pickle

model_dir = "models"

def run_evaluation_pipeline():
    model_files = [f for f in os.listdir(model_dir) if f.endswith('_ibm1.pkl')]
    
    print(f"Found {len(model_files)} models to evaluate.")
    
    final_report = {}

    for filename in sorted(model_files):
        lang_code = filename.replace('_ibm1.pkl', '')
        print(f"Processing: {lang_code}")
        
        print("  Loading data...")
        data = load_data(lang_code)
        if not data:
            continue

        random.seed(42)
        random.shuffle(data)
        split_idx = int(len(data) * 0.8)
        test_data = data[split_idx:]
        test_data = test_data[:1000]

        
        print(f"Final Test set size: {len(test_data)}")

        model_path = os.path.join(model_dir, filename)
        print(f"Loading model: {filename}...")
        try:
            with open(model_path, 'rb') as f:
                model = pickle.load(f)
        except Exception as e:
            print(f"  [Error] Failed to load model: {e}")
            continue

        print(" Evaluating metrics (BLEU, METEOR, ChrF, COMET)...")
        scores = evaluate_metrics(model, test_data)
        
        final_report[lang_code] = scores
        print(f"  Result for {lang_code}:")
        for k, v in scores.items():
            if v is not None:
                val = v if k == 'COMET' else v 
                print(f"    {k}: {val:.4f}")

    return final_report

all_results = run_evaluation_pipeline()

Found 14 models to evaluate.
Processing: es-AR
  Loading data...
  Final Test set size: 1000
  Loading model: es-AR_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.22it/s]


  Result for es-AR:
    BLEU: 0.1379
    METEOR: 0.4618
    ChrF: 0.4516
    COMET: 0.6551
Processing: es-CL
  Loading data...
  Final Test set size: 1000
  Loading model: es-CL_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.21it/s]


  Result for es-CL:
    BLEU: 0.1379
    METEOR: 0.4618
    ChrF: 0.4516
    COMET: 0.6551
Processing: es-CO
  Loading data...
  Final Test set size: 1000
  Loading model: es-CO_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.20it/s]


  Result for es-CO:
    BLEU: 0.1368
    METEOR: 0.4682
    ChrF: 0.4565
    COMET: 0.6562
Processing: es-CR
  Loading data...
  Final Test set size: 1000
  Loading model: es-CR_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.17it/s]


  Result for es-CR:
    BLEU: 0.1379
    METEOR: 0.4618
    ChrF: 0.4516
    COMET: 0.6551
Processing: es-DO
  Loading data...
  Final Test set size: 1000
  Loading model: es-DO_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.26it/s]


  Result for es-DO:
    BLEU: 0.1379
    METEOR: 0.4618
    ChrF: 0.4516
    COMET: 0.6549
Processing: es-EC
  Loading data...
  Final Test set size: 1000
  Loading model: es-EC_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.27it/s]


  Result for es-EC:
    BLEU: 0.1368
    METEOR: 0.4682
    ChrF: 0.4565
    COMET: 0.6562
Processing: es-HN
  Loading data...
  Final Test set size: 1000
  Loading model: es-HN_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.26it/s]


  Result for es-HN:
    BLEU: 0.1379
    METEOR: 0.4618
    ChrF: 0.4516
    COMET: 0.6551
Processing: es-NI
  Loading data...
  Final Test set size: 1000
  Loading model: es-NI_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.25it/s]


  Result for es-NI:
    BLEU: 0.1379
    METEOR: 0.4618
    ChrF: 0.4516
    COMET: 0.6551
Processing: es-PA
  Loading data...
  Final Test set size: 1000
  Loading model: es-PA_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.26it/s]


  Result for es-PA:
    BLEU: 0.1379
    METEOR: 0.4618
    ChrF: 0.4516
    COMET: 0.6551
Processing: es-PE
  Loading data...
  Final Test set size: 1000
  Loading model: es-PE_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.27it/s]


  Result for es-PE:
    BLEU: 0.1368
    METEOR: 0.4682
    ChrF: 0.4565
    COMET: 0.6562
Processing: es-PR
  Loading data...
  Final Test set size: 1000
  Loading model: es-PR_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.27it/s]


  Result for es-PR:
    BLEU: 0.1368
    METEOR: 0.4682
    ChrF: 0.4565
    COMET: 0.6562
Processing: es-SV
  Loading data...
  Final Test set size: 1000
  Loading model: es-SV_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.27it/s]


  Result for es-SV:
    BLEU: 0.1368
    METEOR: 0.4682
    ChrF: 0.4565
    COMET: 0.6562
Processing: es-UY
  Loading data...
  Final Test set size: 1000
  Loading model: es-UY_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.24it/s]


  Result for es-UY:
    BLEU: 0.1380
    METEOR: 0.4621
    ChrF: 0.4519
    COMET: 0.6550
Processing: es-VE
  Loading data...
  Final Test set size: 1000
  Loading model: es-VE_ibm1.pkl...
 Evaluating metrics (BLEU, METEOR, ChrF, COMET)...


ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Calculating COMET...


Predicting DataLoader 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.26it/s]


  Result for es-VE:
    BLEU: 0.1368
    METEOR: 0.4682
    ChrF: 0.4565
    COMET: 0.6562
