In [4]:
from sklearn import metrics
import warnings

warnings.filterwarnings("ignore")

sorted_labels_eng = ["O", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-MISC", "I-MISC"]
sorted_labels_chn = ["O", "B-NAME", "M-NAME", "E-NAME", "S-NAME", "B-CONT", "M-CONT", "E-CONT", "S-CONT", "B-EDU", "M-EDU", "E-EDU", "S-EDU", "B-TITLE", "M-TITLE", "E-TITLE", "S-TITLE", "B-ORG", "M-ORG", "E-ORG", "S-ORG", "B-RACE", "M-RACE", "E-RACE", "S-RACE", "B-PRO", "M-PRO", "E-PRO", "S-PRO", "B-LOC", "M-LOC", "E-LOC", "S-LOC"]


def check(language, gold_path, my_path):
    if language == "English":
        sort_labels = sorted_labels_eng
    else:
        sort_labels = sorted_labels_chn
    y_true = []
    y_pred = []
    with open(gold_path, "r") as g_f, open(my_path, "r") as m_f:
        g_lines = g_f.readlines()
        m_lines = m_f.readlines()
        assert len(g_lines) == len(m_lines), "Length is Not Equal."
        for i in range(len(g_lines)):
            if g_lines[i] == "\n":
                continue
            g_word, g_tag = g_lines[i].strip().split(" ")
            m_word, m_tag = m_lines[i].strip().split(" ")
            y_true.append(g_tag)
            y_pred.append(m_tag)
    # print(f"Micro F1 Score = {metrics.f1_score(y_true, y_pred, average='micro'):.4f}")
    print(metrics.classification_report(
        y_true = y_true, y_pred=y_pred, labels=sort_labels[1:], digits=4
    ))
    return

In [5]:
print("HMM-Chinese: ", end="")
check(language = "Chinese", gold_path="Chinese/validation.txt", my_path="Chinese/validation_HMM.txt")

print("HMM-English: ", end="")
check(language = "English", gold_path="English/validation.txt", my_path="English/validation_HMM.txt")

print("CRF-Chinese: ", end="")
check(language = "Chinese", gold_path="Chinese/validation.txt", my_path="Chinese/validation_CRF.txt")

print("CRF-English: ", end="")
check(language = "English", gold_path="English/validation.txt", my_path="English/validation_CRF.txt")

print("CRF_TF-Chinese: ", end="")
check(language = "Chinese", gold_path="Chinese/validation.txt", my_path="Chinese/validation_CRF_TF.txt")

print("CRF_TF-English: ", end="")
check(language = "English", gold_path="English/validation.txt", my_path="English/validation_CRF_TF.txt")

HMM-Chinese:               precision    recall  f1-score   support

      B-NAME     0.9174    0.9804    0.9479       102
      M-NAME     0.9136    0.9867    0.9487        75
      E-NAME     0.9083    0.9706    0.9384       102
      S-NAME     1.0000    0.5000    0.6667         8
      B-CONT     0.8649    0.9697    0.9143        33
      M-CONT     0.8750    0.9844    0.9265        64
      E-CONT     0.8919    1.0000    0.9429        33
      S-CONT     0.0000    0.0000    0.0000         0
       B-EDU     0.8333    0.9434    0.8850       106
       M-EDU     0.8140    0.9887    0.8929       177
       E-EDU     0.8739    0.9811    0.9244       106
       S-EDU     0.0000    0.0000    0.0000         0
     B-TITLE     0.8214    0.8679    0.8440       689
     M-TITLE     0.8189    0.9020    0.8584      1479
     E-TITLE     0.9203    0.9724    0.9457       689
     S-TITLE     0.0000    0.0000    0.0000         0
       B-ORG     0.8923    0.9368    0.9140       522
       M-ORG  

In [6]:
print("HMM-Chinese: ", end="")
check(language = "Chinese", gold_path="Chinese/chinese_test.txt", my_path="Chinese/chinese_test_HMM.txt")

print("HMM-English: ", end="")
check(language = "English", gold_path="English/english_test.txt", my_path="English/english_test_HMM.txt")

print("CRF-Chinese: ", end="")
check(language = "Chinese", gold_path="Chinese/chinese_test.txt", my_path="Chinese/chinese_test_CRF.txt")

print("CRF-English: ", end="")
check(language = "English", gold_path="English/english_test.txt", my_path="English/english_test_CRF.txt")

print("CRF_TF-Chinese: ", end="")
check(language = "Chinese", gold_path="Chinese/chinese_test.txt", my_path="Chinese/chinese_test_CRF_TF.txt")

print("CRF_TF-English: ", end="")
check(language = "English", gold_path="English/english_test.txt", my_path="English/english_test_CRF_TF.txt")

HMM-Chinese:               precision    recall  f1-score   support

      B-NAME     0.9808    0.9107    0.9444       112
      M-NAME     0.9753    0.9634    0.9693        82
      E-NAME     0.9712    0.9018    0.9352       112
      S-NAME     0.0000    0.0000    0.0000         0
      B-CONT     0.9333    1.0000    0.9655        28
      M-CONT     0.9138    1.0000    0.9550        53
      E-CONT     0.9333    1.0000    0.9655        28
      S-CONT     0.0000    0.0000    0.0000         0
       B-EDU     0.8618    0.9464    0.9021       112
       M-EDU     0.8390    0.9609    0.8958       179
       E-EDU     0.9016    0.9821    0.9402       112
       S-EDU     0.0000    0.0000    0.0000         0
     B-TITLE     0.8619    0.9000    0.8806       770
     M-TITLE     0.8798    0.8881    0.8839      1921
     E-TITLE     0.9316    0.9727    0.9517       770
     S-TITLE     0.0000    0.0000    0.0000         0
       B-ORG     0.8918    0.9402    0.9153       552
       M-ORG  