In [None]:
from sklearn_crfsuite import CRF, metrics
from pathlib import Path
import sys
from seqeval.metrics import f1_score, classification_report
import joblib


if Path.cwd().parent not in sys.path:
    sys.path.insert(0, str(Path.cwd().parent))

from config import settings
from utils.mex4 import read_conll_any, add_context, simple_split

### Approach 1

In [None]:
# Data loading
X_train_raw, y_train = read_conll_any(Path("../data/mex4/train_weak.conll"))
X_train_raw, y_train, X_test_raw, y_test = simple_split(X_train_raw, y_train, dev_ratio=0.1)

# Add simple ±1 context on top of whatever features exist
X_train = add_context(X_train_raw)
X_test = add_context(X_test_raw)

crf = CRF(
        algorithm="lbfgs",
        c1=0.2, c2=0.1,
        max_iterations=200,
        all_possible_transitions=True,
    )
crf.fit(X_train, y_train)


y_predicted = crf.predict(X_test)
print(f"\nTest F1: {f1_score(y_test, y_predicted):.4f}")
print(classification_report(y_test, y_predicted, digits=3))

joblib.dump(crf, "../outputs/mex4_crf_model.joblib")
print(f"\nSaved model to: ../outputs/mex4_crf_model.joblib")
