# LatinCy Evaluation on Glosses
This notebook tests LatinCy for lemmatization and POS tagging on ~600 Latin glosses. Results are compared to the dataset's original tags.

Created by Thea Schaaf, March 2025

In [None]:
import pandas as pd
import numpy as np
#import matplotlib.pyplot as plt
#import seaborn as sns
import time
import json
import os
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [None]:
notebook_path = os.path.abspath("01-latincy.ipynb")

OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory.

In [None]:
# Configuration
MODEL_NAME = "LatinCy"
SAMPLE_TYPES = ["medieval_charters", "glosses"]
TASKS = ["lemmatization", "pos_tagging"]

In [None]:
# Results storage
results = {
    "model_name": MODEL_NAME,
    "processing_times": {},
    "accuracy": {},
    "precision": {},
    "recall": {},
    "f1_score": {}
}

In [None]:
# import model
import spacy
nlp = spacy.load("la_core_web_lg")

In [None]:
for sample_type in SAMPLE_TYPES:
    print(f"Processing {sample_type}...")

    # Load gold standard data
    gold_file = os.path.join(os.path.dirname(notebook_path), f"../data/gold_standard/gs_{sample_type}.csv")

    gold_df = pd.read_csv(gold_file)


    # Extract text to reconstruct from words
    sample_texts = []
    for sample_id in gold_df['sample_id'].unique():
        words = gold_df[gold_df['sample_id'] == sample_id]['word'].tolist()
        text = ' '.join(words)
        sample_texts.append((sample_id, text))

    # Process samples and measure time
    start_time = time.time()

    processed_results = []
    for sample_id, text in sample_texts:
        doc = nlp(text)
        for token in doc:
            processed_results.append({
            "sample_id": sample_id,
            "word_id": token.i,
            "word": token.text,
            "lemma": token.lemma_,
            "pos": token.pos_,
            })

    processing_time = time.time() - start_time
    results["processing_times"][sample_type] = processing_time

In [None]:
print(processed_results)
pred_df = pd.DataFrame(processed_results)

In [None]:
merged_df = pd.merge(gold_df, pred_df, on=['sample_id', 'word_id','word'], suffixes=('_gold', '_pred'))

In [None]:
 # Evaluate lemmatization
lemma_accuracy = accuracy_score(merged_df['lemma_gold'], merged_df['lemma_pred'])
lemma_precision, lemma_recall, lemma_f1, _ = precision_recall_fscore_support(
    merged_df['lemma_gold'] == merged_df['lemma_pred'],
    [True] * len(merged_df),
    average='binary'
)

TypeError: '<' not supported between instances of 'str' and 'float'

In [None]:
# Evaluate POS tagging
pos_accuracy = accuracy_score(merged_df['pos_gold'], merged_df['pos_pred'])
pos_precision, pos_recall, pos_f1, _ = precision_recall_fscore_support(
    merged_df['pos_gold'] == merged_df['pos_pred'],
    [True] * len(merged_df),
    average='binary'
)

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
merged_df.to_csv(f"../results/{MODEL_NAME}_{sample_type}_detailed.csv", index=False)

print(f"Completed {sample_type}. Processing time: {processing_time:.2f}s")
print(f"Lemmatization accuracy: {lemma_accuracy:.4f}")
print(f"POS tagging accuracy: {pos_accuracy:.4f}")
print("-" * 50)

In [None]:
# Save summary results
with open(f"../results/{MODEL_NAME}_summary.json", "w") as f:
    json.dump(results, f, indent=2)
