In [4]:
import torch
from seq2seq_train import Seq2SeqTrainer
import json
import pandas as pd
from datetime import datetime
import os
from torchmetrics.text import SacreBLEUScore
from nltk.translate.bleu_score import corpus_bleu
import sacrebleu

2025-03-24 19:09:50.922607: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
def save_model_comparison(config_paths):
    # Initialize lists to store results
    results = []
    
    for config_path in config_paths:
        print(f"Attempting to load from: {config_path}")
        
        with open(config_path, "r") as f:
            loaded_config = json.load(f)
        
        # Initialize trainer with new dataset size but keeping optimized parameters
        trainer = Seq2SeqTrainer(loaded_config)
        trainer.load_pretrained_model()
        
        # Get predictions and compute metrics
#         predicted, targeted = trainer.generate_predictions()  //  uncomment to segment for the first time

        predicted, targeted, sources = trainer.get_predictions_from_file()

        
        position_scores = trainer.eval_morph_segments_position(predicted, targeted)
        bleu_scores = trainer.eval_bleu_segment(predicted, targeted)
        chrf_score = trainer.eval_chrF_segment(predicted, targeted)
        sacre_bleu = trainer.eval_sacrebleu_segment(predicted, targeted)

        # Save individual model results
        trainer.save_evaluation_results(loaded_config["file_path"], position_scores, bleu_scores, chrf_score, sacre_bleu)
        
        # Collect results for comparison
        result = {
            'model_name': loaded_config.get('model_name', loaded_config["file_path"].split("/")[-1]),
            'position_precision': position_scores['precision'],
            'position_recall': position_scores['recall'],
            'position_f1': position_scores['f1'],
            'bleu': bleu_scores['equal'],
            'chrf': chrf_score,
            'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        
        results.append(result)
        
        print(f"Model: {result['model_name']}")
        print(f"Position Scores: Precision={result['position_precision']:.3f}, Recall={result['position_recall']:.3f}, F1={result['position_f1']:.3f}")
        print(f"BLEU Score: {result['bleu']:.4f}")
        print(f"chrF Score: {result['chrf']:.4f}")
        
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(results)
    comparison_file = "model_comparison_results_unaugmented.csv"
    
    # If file exists, append without headers
    if os.path.exists(comparison_file):
        df.to_csv(comparison_file, mode='a', header=False, index=False)
    else:
        df.to_csv(comparison_file, index=False)
    
    print(f"\nComparison results saved to: {comparison_file}")
    
    # Create a formatted markdown table for easy viewing
    markdown_table = "# Model Comparison Results\n\n"
    markdown_table += df.to_markdown(index=False)
    
    with open("model_comparison_results_unaugmented.md", "w") as f:
        f.write(markdown_table)
    
    return df


In [7]:
# Usage
if __name__ == "__main__":
    config_paths = [
        "unaugmented_transformer_models/segmenter_one/model_config.json",
        "unaugmented_transformer_models/segmenter_two/model_config.json",
        "unaugmented_transformer_models/segmenter_three/model_config.json"
    ]
    comparison_df = save_model_comparison(config_paths)
    print("\nComparison Summary:")
    print(comparison_df)

Attempting to load from: unaugmented_transformer_models/segmenter_one/model_config.json
Using device: cuda
Device name: NVIDIA RTX A6000
Device memory: 47.535888671875 GB
Original shape: (367178, 4)
Final shape: (100000, 4)
The config file has been saved on unaugmented_transformer_models/segmenter_one
Tokenizer path: unaugmented_transformer_models/segmenter_one/tokenizers/tokenizers_tokens.json
Loading existing tokenizer from unaugmented_transformer_models/segmenter_one/tokenizers/tokenizers_tokens.json
Tokenizer path: unaugmented_transformer_models/segmenter_one/tokenizers/tokenizers_segmenter_one.json
Loading existing tokenizer from unaugmented_transformer_models/segmenter_one/tokenizers/tokenizers_segmenter_one.json
the dataset length: 100000
Max length of source sentence: 30
Max length of target sentence: 41
Loaded best model from epoch 6
Model configuration:
- d_model: 1024
- num_layers: 3
- num_heads: 4
- d_ff: 1024
- dropout: 0.1129068748094076
- label_smoothing: 0.0398669989592