In [1]:
import torch
from seq2seq_train import Seq2SeqTrainer
import json
import pandas as pd
from datetime import datetime
import os
from torchmetrics.text import SacreBLEUScore
from nltk.translate.bleu_score import corpus_bleu
import sacrebleu
import pandas as pd

2025-03-24 22:37:16.883452: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
def save_model_comparison(config_paths):
    # Initialize lists to store results
    results = []
    
    for config_path in config_paths:
        print(f"Attempting to load from: {config_path}")
        
        with open(config_path, "r") as f:
            loaded_config = json.load(f)
        
        # Initialize trainer with new dataset size but keeping optimized parameters
        trainer = Seq2SeqTrainer(loaded_config)
        trainer.load_pretrained_model()

#         data = pd.read_csv("data/flores_en_zu_segmented.csv")
        
        # Get predictions and compute metrics
        predicted, targeted, sources = trainer.get_predictions_from_file()
        
        
        bleu_scores = trainer.eval_bleu_segment(predicted, targeted)
        chrf_score = trainer.eval_chrF_segment(predicted, targeted)
        sacre_bleu = trainer.eval_sacrebleu_segment(predicted, targeted)

        # Save individual model results
        trainer.save_evaluation_results(loaded_config["file_path"], bleu_scores, chrf_score, sacre_bleu)
        
        # Collect results for comparison
        result = {
            'model_name': loaded_config.get('model_name', loaded_config["file_path"].split("/")[-1]),
            'bleu_score': bleu_scores['simple'],
            'chrf': chrf_score,
            'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        results.append(result)
        
        print(f"Model: {result['model_name']}")
        print(f"BLEU={bleu_scores['simple']:.4f}")
        print(f"CHRF: {chrf_score:.4f}")
        
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(results)
    comparison_file = "model_comparison_results_zu_en_flores_testing_set.csv"
    
    # If file exists, append without headers
    if os.path.exists(comparison_file):
        df.to_csv(comparison_file, mode='a', header=False, index=False)
    else:
        df.to_csv(comparison_file, index=False)
    
    print(f"\nComparison results saved to: {comparison_file}")
    
    # Create a formatted markdown table for easy viewing
    markdown_table = "# Model Comparison Results\n\n"
    markdown_table += df.to_markdown(index=False)
    
    with open("model_comparison_results_zu_en_flores_testing_set.md", "w") as f:
        f.write(markdown_table)
    
    return df


In [6]:
if __name__ == "__main__":
    config_paths = [
        "zulu_english_models/zulu_english_none/model_config.json",
        "zulu_english_models/zulu_english_seg_one/model_config.json",
        "zulu_english_models/zulu_english_seg_two/model_config.json",
        "zulu_english_models/zulu_english_seg_three/model_config.json"
    ]
    
    comparison_df = save_model_comparison(config_paths)
    
    print("\nComparison Summary:")
    print(comparison_df)

Attempting to load from: zulu_english_models/zulu_english_none/model_config.json
Using device: cuda
Device name: NVIDIA RTX A6000
Device memory: 47.535888671875 GB
Original shape: (100000, 5)
Final shape: (100000, 5)
The config file has been saved on zulu_english_models/zulu_english_none
the dataset length: 100000
Tokenizer path: zulu_english_models/zulu_english_none/tokenizers/tokenizers_isizulu.json
Loading existing tokenizer from zulu_english_models/zulu_english_none/tokenizers/tokenizers_isizulu.json
Tokenizer path: zulu_english_models/zulu_english_none/tokenizers/tokenizers_english.json
Loading existing tokenizer from zulu_english_models/zulu_english_none/tokenizers/tokenizers_english.json
Max length of source sentence: 53
Max length of target sentence: 77
Loaded best model from epoch 21
Model configuration:
- d_model: 1024
- num_layers: 3
- num_heads: 8
- d_ff: 1024
- dropout: 0.2112720416358559
- label_smoothing: 0.04818508143643119
- max_grad_norm: 1.8325823336861036
- lr: 5.17

Original pairs: 1012
Valid pairs after filtering: 985
Filtered out 27 pairs
Original pairs: 1012
Valid pairs after filtering: 985
Filtered out 27 pairs
The evaluation results were successfully saved to:
- Text format: zulu_english_models/zulu_english_seg_two/evaluation_results.txt
Model: zulu_english_seg_two
BLEU=0.1020
CHRF: 0.3533
Attempting to load from: zulu_english_models/zulu_english_seg_three/model_config.json
Using device: cuda
Device name: NVIDIA RTX A6000
Device memory: 47.535888671875 GB
Original shape: (100000, 5)
Final shape: (100000, 5)
The config file has been saved on zulu_english_models/zulu_english_seg_three/
the dataset length: 100000
Tokenizer path: zulu_english_models/zulu_english_seg_three/tokenizers/tokenizers_segmenter_three.json
Loading existing tokenizer from zulu_english_models/zulu_english_seg_three/tokenizers/tokenizers_segmenter_three.json
Vocabulary finalized for inference
Tokenizer path: zulu_english_models/zulu_english_seg_three/tokenizers/tokenizers_en