In [9]:
ls ../../../../data/augmented_and_segmented_various.csv

../../../../data/augmented_and_segmented_various.csv


In [12]:
import os
import json
import pandas as pd
from datetime import datetime
from crf_segmenter import MorphologicalSegmentation

def evaluate_crf(model_paths):
    """
    Compare multiple models based on their saved prediction files.
    
    Args:
        model_paths (list): List of paths to model directories containing predictions
        
    Returns:
        pd.DataFrame: DataFrame containing evaluation results for all models
    """
    results = []  # List to store results
    
    for model_path in model_paths:
        print(f"\n{'='*50}")
        print(f"Evaluating model at: {model_path}")
        print(f"{'='*50}")
        
        model_name = os.path.basename(model_path)
        predictions_dir = os.path.join(model_path, 'predictions')
        
        # Initialize the evaluator
        try:
            evaluator = Evaluator(None)  # Pass None since we don't need feature extractor for file evaluation
        except Exception as e:
            print(f"Error initializing Evaluator: {str(e)}")
            continue
            
        # Get predictions from file
        try:
            results_dict, predicted, target = evaluator.get_predictions_from_file(
                predictions_dir=predictions_dir
            )
        except Exception as e:
            print(f"Error reading predictions: {str(e)}")
            continue
    
        # Collect results for comparison
        result = {
            'model_name': model_name,
            'position_precision': results_dict['position']['precision'],
            'position_recall': results_dict['position']['recall'],
            'position_f1': results_dict['position']['f1'],
            'bleu_scores': results_dict['bleu_scores']['equal'],
            'chrf': results_dict['chrf'],
            'num_predictions': len(predicted),
            'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        results.append(result)
        
        # Print individual model results
        print(f"\nResults for model: {result['model_name']}")
        print(f"Number of predictions: {result['num_predictions']}")
        print(f"Position Scores: Precision={result['position_precision']:.3f}, "
              f"Recall={result['position_recall']:.3f}, F1={result['position_f1']:.3f}")
        print(f"BLEU Score={result['bleu_scores']:.4f}")
        print(f"chrF Score: {result['chrf']:.4f}")
    
    if not results:
        print("No results to compare - all models failed evaluation")
        return None
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(results)
    comparison_file = "model_comparison_results_file.csv"
    
    if os.path.exists(comparison_file):
        df.to_csv(comparison_file, mode='a', header=False, index=False)
    else:
        df.to_csv(comparison_file, index=False)
    
    print(f"\nComparison results saved to: {comparison_file}")
    
    # Create a formatted markdown table
    markdown_table = "# Model Comparison Results\n\n"
    markdown_table += "## Summary Statistics\n\n"
    
    # Add summary statistics
    summary_df = df[['position_f1', 'bleu_scores', 'chrf']].agg(['mean', 'std', 'min', 'max'])
    markdown_table += summary_df.to_markdown() + "\n\n"
    
    # Add full results table
    markdown_table += "## Detailed Results\n\n"
    markdown_table += df.to_markdown(index=False)
    
    with open("model_comparison_results_file.md", "w") as f:
        f.write(markdown_table)
    
    print(f"Comparison markdown saved to: model_comparison_results.md")
    
    return df

In [13]:
from evaluator import Evaluator  # Make sure this import matches your file structure

if __name__ == "__main__":
    model_paths = [
        "models_finals/segmenter_one",
        "models_finals/segmenter_two",
        "models_finals/segmenter_three"
    ]
    results_df = evaluate_crf(model_paths)


Evaluating model at: models_finals/segmenter_one
Reading predictions from: models_finals/segmenter_one/predictions/predictions_20250108_185936.csv
Original pairs: 20000
Valid pairs after filtering: 20000
Filtered out 0 pairs
Original pairs: 20000
Valid pairs after filtering: 20000
Filtered out 0 pairs
Original pairs: 20000
Valid pairs after filtering: 20000
Filtered out 0 pairs
Original pairs: 20000
Valid pairs after filtering: 20000
Filtered out 0 pairs

Successfully loaded 20000 predictions

Example predictions:

Example 1:
Target: s-a-gcwalisek-a
Predicted: s-a-gcwalisek-a

Example 2:
Target: aba-s-e-msebenz-ini
Predicted: a-ba-s-e-msebenz-ini

Example 3:
Target: aba-n-e-nhlanhla
Predicted: aba-n-e-n-hlanhla

Results for model: segmenter_one
Number of predictions: 20000
Position Scores: Precision=0.850, Recall=0.855, F1=0.853
BLEU Score=0.8736
chrF Score: 0.9356

Evaluating model at: models_finals/segmenter_two
Reading predictions from: models_finals/segmenter_two/predictions/predi