**Library**

In [31]:
import sys
import os
import yaml
import json
import pandas as pd
import random
import numpy as np
from sklearn_crfsuite import metrics

In [32]:
sys.path.append(os.path.abspath(os.path.join('..')))
from src.features.hand_crafted import sent2features, sent2labels, sent2tokens, get_relation_features
from src.models.machine_learning import CRFModel, FlatModelWrapper, RelationExtractionModel
from src.data_loader.dataset import convert_label_studio_to_ner_data, prepare_re_data_from_json
from sklearn.ensemble import RandomForestClassifier
from transformers import AutoTokenizer
from sklearn_crfsuite import metrics
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, f1_score
from imblearn.over_sampling import SMOTE, RandomOverSampler

**Load Config**

In [33]:
with open('../configs/ml_config.yaml', 'r', encoding='utf-8') as f:
    config = yaml.safe_load(f)
for model_name in ['svm', 'maxent', 'random_forest']:
    if model_name in config:
        config[model_name]['vectorizer'] = 'phobert'

print("Config updated for PhoBERT vectors!")

Config updated for PhoBERT vectors!


**Load Vector**

In [34]:
VECTOR_DIR = '../data/vectors/' 
RAW_DATA_DIR = '../data/04_model_input/'

print(f"Loading Data from {VECTOR_DIR}...")

# --- 1. LOAD VECTOR FILES ---
# NER Data
X_train_ner_3d = np.load(os.path.join(VECTOR_DIR, 'X_train_ner_phobert.npy'), mmap_mode='r')
y_train_ner_3d = np.load(os.path.join(VECTOR_DIR, 'y_train_ner_phobert.npy'), mmap_mode='r')
X_test_ner_3d  = np.load(os.path.join(VECTOR_DIR, 'X_test_ner_phobert.npy'), mmap_mode='r')
y_test_ner_3d  = np.load(os.path.join(VECTOR_DIR, 'y_test_ner_phobert.npy'), mmap_mode='r')

# RE Data
X_train_re = np.load(os.path.join(VECTOR_DIR, 'X_train_re_phobert.npy'), mmap_mode='r')
y_train_re = np.load(os.path.join(VECTOR_DIR, 'y_train_re_phobert.npy'), mmap_mode='r')
X_test_re  = np.load(os.path.join(VECTOR_DIR, 'X_test_re_phobert.npy'), mmap_mode='r')
y_test_re  = np.load(os.path.join(VECTOR_DIR, 'y_test_re_phobert.npy'), mmap_mode='r')

print("Vectors Loaded Successfully!")

print("Re-creating Label Mappings from Raw Data...")

with open(os.path.join(RAW_DATA_DIR, 'train_dataset.json'), 'r', encoding='utf-8') as f:
    train_json = json.load(f)

ner_data_raw = convert_label_studio_to_ner_data(train_json)
ner_labels_set = set()
for sent in ner_data_raw:
    for token, label in sent:
        ner_labels_set.add(label)


ner_labels = sorted(list(ner_labels_set))
ner_id2label = {i: label for i, label in enumerate(ner_labels)}
print(f"NER Mapping ({len(ner_id2label)} tags): {ner_id2label}")

re_data_raw = prepare_re_data_from_json(train_json)

re_labels_set = set(item['label'] for item in re_data_raw)
re_labels = sorted(list(re_labels_set))
re_id2label = {i: label for i, label in enumerate(re_labels)}

print(f"RE Mapping ({len(re_id2label)} classes): {re_id2label}")

Loading Data from ../data/vectors/...
Vectors Loaded Successfully!
Re-creating Label Mappings from Raw Data...


Converting with Pyvi: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 797/797 [00:01<00:00, 676.55it/s]


NER Mapping (17 tags): {0: 'B-AREA', 1: 'B-ATTR', 2: 'B-LOC', 3: 'B-O', 4: 'B-ORG', 5: 'B-PER', 6: 'B-PRICE', 7: 'B-TYPE', 8: 'I-AREA', 9: 'I-ATTR', 10: 'I-LOC', 11: 'I-O', 12: 'I-ORG', 13: 'I-PER', 14: 'I-PRICE', 15: 'I-TYPE', 16: 'O'}
RE Mapping (5 classes): {0: 'HAS_AREA', 1: 'HAS_ATTR', 2: 'HAS_PRICE', 3: 'LOCATED_AT', 4: 'NO_RELATION'}


**Load Data**

In [35]:
print("Loading data files...")
TRAIN_PATH = '../data/04_model_input/train_dataset.json'
TEST_PATH = '../data/04_model_input/test_dataset.json'
DEV_PATH = '../data/04_model_input/dev_dataset.json'

with open(TRAIN_PATH, 'r', encoding='utf-8') as f:
    train_json = json.load(f)

with open(TEST_PATH, 'r', encoding='utf-8') as f:
    test_json = json.load(f)

with open(DEV_PATH, 'r', encoding='utf-8') as f:
    dev_json = json.load(f)

print(f"-> Train raw docs: {len(train_json)}")
print(f"-> Test raw docs: {len(test_json)}")
print(f"-> Dev raw docs: {len(dev_json)}")

Loading data files...
-> Train raw docs: 797
-> Test raw docs: 114
-> Dev raw docs: 228


In [36]:
print("\nConverting to BIO format...")
train_sents = convert_label_studio_to_ner_data(train_json)
test_sents = convert_label_studio_to_ner_data(test_json)
dev_sents = convert_label_studio_to_ner_data(dev_json)


Converting to BIO format...


Converting with Pyvi: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 797/797 [00:01<00:00, 690.88it/s]
Converting with Pyvi: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 114/114 [00:00<00:00, 616.34it/s]
Converting with Pyvi: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 228/228 [00:00<00:00, 615.68it/s]


In [37]:
train_sents[0][:20]

[('B√°n', 'O'),
 ('l√¥', 'B-TYPE'),
 ('ƒë·∫•t', 'I-TYPE'),
 ('100m¬≤', 'B-AREA'),
 ('ngay', 'O'),
 ('c·∫°nh', 'O'),
 ('nh√†', 'O'),
 ('thi_ƒë·∫•u', 'B-ATTR'),
 ('Th·ªß_Th·ª´a', 'I-ATTR'),
 (',', 'O'),
 ('view', 'B-ATTR'),
 ('h·ªì', 'I-ATTR'),
 ('sinh_th√°i', 'I-ATTR'),
 ('m√°t_m·∫ª', 'I-ATTR'),
 (',', 'O'),
 ('gi√°', 'O'),
 ('m·ªÅm', 'O'),
 (',', 'O'),
 ('x√¢y_d·ª±ng', 'O'),
 ('·ªü', 'O')]

**Extracting features**

In [38]:
# print("\nExtracting features")
# X_train = [sent2features(s) for s in train_sents]
# y_train = [sent2labels(s) for s in train_sents]

# X_test = [sent2features(s) for s in test_sents]
# y_test = [sent2labels(s) for s in test_sents]

# print(f"-> X_train shape: {len(X_train)} sentences")
# print(f"-> X_test shape: {len(X_test)} sentences")

In [39]:
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base-v2")
all_labels = sorted(list(set([label for sent in train_sents for token, label in sent])))
id2label = {i: label for i, label in enumerate(all_labels)}
print("Label Map:", id2label)

Label Map: {0: 'B-AREA', 1: 'B-ATTR', 2: 'B-LOC', 3: 'B-O', 4: 'B-ORG', 5: 'B-PER', 6: 'B-PRICE', 7: 'B-TYPE', 8: 'I-AREA', 9: 'I-ATTR', 10: 'I-LOC', 11: 'I-O', 12: 'I-ORG', 13: 'I-PER', 14: 'I-PRICE', 15: 'I-TYPE', 16: 'O'}


In [40]:
def decode_tags(y_ids_list, id2label):
    decoded_list = []
    for sent in y_ids_list:
        sent_decoded = [id2label[int(i)] for i in sent]
        decoded_list.append(sent_decoded)
    return decoded_list

In [41]:
def evaluate_model(y_true, y_pred, model_name):
    print(f"\n{'='*20} EVALUATION: {model_name} {'='*20}")
    IGNORE_LABELS = ['B-O', 'I-O']
    unique_labels = set([l for sent in y_true for l in sent] + [l for sent in y_pred for l in sent])
    labels_to_report = sorted([l for l in unique_labels if l not in IGNORE_LABELS])
    print(f"‚ÑπReported Labels: {labels_to_report}")
    
    print(f"Accuracy: {metrics.flat_accuracy_score(y_true, y_pred):.4f}")
    print(metrics.flat_classification_report(
        y_true, y_pred, labels=labels_to_report, digits=4
    ))

### **NER**

**CRF (Conditional Random Fields)**

In [42]:
def prepare_data_for_crf_phobert(X_3d, y_3d, id2label):
    X_crf = []
    y_crf = []
    
    print("Converting PhoBERT vectors to CRF features")
    
    for i in range(len(y_3d)):
        vectors = X_3d[i]
        labels = y_3d[i]

        valid_mask = labels != -100
        
        if not np.any(valid_mask): continue
            
        sent_vectors = vectors[valid_mask]
        sent_labels = labels[valid_mask]
        
        sent_features = []
        for vec in sent_vectors:
            feat = {f'd_{idx}': float(val) for idx, val in enumerate(vec)}
            
            feat['bias'] = 1.0
            sent_features.append(feat)
            
        sent_tags = [id2label[int(l)] for l in sent_labels]
        
        X_crf.append(sent_features)
        y_crf.append(sent_tags)
        
    return X_crf, y_crf

In [43]:
print("\nProcessing Data for CRF...")
X_train_crf, y_train_crf = prepare_data_for_crf_phobert(X_train_ner_3d, y_train_ner_3d, id2label)
X_test_crf, y_test_crf   = prepare_data_for_crf_phobert(X_test_ner_3d, y_test_ner_3d, id2label)
print(f"-> Train samples: {len(X_train_crf)}")
print(f"-> Test samples: {len(X_test_crf)}")


Processing Data for CRF...
Converting PhoBERT vectors to CRF features
Converting PhoBERT vectors to CRF features
-> Train samples: 797
-> Test samples: 114


In [44]:
print("Training CRF model")
crf = CRFModel(config.get('crf', {}))
crf.train(X_train_crf, y_train_crf)

Training CRF model


In [45]:
# Predict & Evaluate
y_pred_crf = crf.predict(X_test_crf)
evaluate_model(y_test_crf, y_pred_crf, "CRF")


‚ÑπReported Labels: ['B-AREA', 'B-ATTR', 'B-LOC', 'B-ORG', 'B-PER', 'B-PRICE', 'B-TYPE', 'I-AREA', 'I-ATTR', 'I-LOC', 'I-ORG', 'I-PER', 'I-PRICE', 'I-TYPE', 'O']
Accuracy: 0.7500
              precision    recall  f1-score   support

      B-AREA     0.6327    0.6237    0.6282       279
      B-ATTR     0.7045    0.5701    0.6302       870
       B-LOC     0.7219    0.6816    0.7011       358
       B-ORG     0.6471    0.3667    0.4681        30
       B-PER     0.4667    0.4000    0.4308        35
     B-PRICE     0.8540    0.8182    0.8357       143
      B-TYPE     0.6928    0.6150    0.6516       187
      I-AREA     0.7976    0.8100    0.8038       579
      I-ATTR     0.6543    0.5997    0.6259      1594
       I-LOC     0.6713    0.6544    0.6627       596
       I-ORG     0.5833    0.6087    0.5957        46
       I-PER     0.5385    0.3500    0.4242        40
     I-PRICE     0.9079    0.7962    0.8484       260
      I-TYPE     0.3636    0.4533    0.4036       150
         

In [46]:
def process_ner_vectors(X_3d, y_3d):
    X_list = []
    y_list = []
    
    for i in range(len(y_3d)):
        # L·∫•y nh√£n c·ªßa c√¢u th·ª© i
        labels = y_3d[i]
        vectors = X_3d[i]
        
        valid_indices = labels != -100     
        X_sent = vectors[valid_indices]
        y_sent = labels[valid_indices]
        
        if len(y_sent) > 0:
            X_list.append(X_sent)
            y_list.append(y_sent)
            
    return X_list, y_list

print("Processing NER vectors (Removing padding)...")
X_train_ner, y_train_ner = process_ner_vectors(X_train_ner_3d, y_train_ner_3d)
X_test_ner, y_test_ner = process_ner_vectors(X_test_ner_3d, y_test_ner_3d)

print(f"-> NER Train sentences: {len(X_train_ner)}")
print(f"-> Sample sentence length: {len(X_train_ner[0])}")

Processing NER vectors (Removing padding)...
-> NER Train sentences: 797
-> Sample sentence length: 38


**SVM (Support Vector Machine)**

In [47]:
print("Training SVM Model")
svm = FlatModelWrapper('svm', config.get('svm', {}))
svm.train(X_train_ner, y_train_ner)

Training SVM Model


In [48]:
# Predict & Evaluate
y_pred_svm = svm.predict(X_test_ner)
y_test_tags = decode_tags(y_test_ner, id2label)
y_pred_tags = decode_tags(y_pred_svm, id2label)
evaluate_model(y_test_tags, y_pred_tags, "SVM")


‚ÑπReported Labels: ['B-AREA', 'B-ATTR', 'B-LOC', 'B-ORG', 'B-PER', 'B-PRICE', 'B-TYPE', 'I-AREA', 'I-ATTR', 'I-LOC', 'I-ORG', 'I-PER', 'I-PRICE', 'I-TYPE', 'O']
Accuracy: 0.7444
              precision    recall  f1-score   support

      B-AREA     0.6455    0.4373    0.5214       279
      B-ATTR     0.7173    0.4287    0.5367       870
       B-LOC     0.6544    0.7458    0.6971       358
       B-ORG     0.6667    0.1333    0.2222        30
       B-PER     0.6154    0.4571    0.5246        35
     B-PRICE     0.8681    0.8741    0.8711       143
      B-TYPE     0.6244    0.7380    0.6765       187
      I-AREA     0.8045    0.8031    0.8038       579
      I-ATTR     0.6059    0.5885    0.5971      1594
       I-LOC     0.7323    0.6426    0.6845       596
       I-ORG     0.4737    0.3913    0.4286        46
       I-PER     0.6000    0.3750    0.4615        40
     I-PRICE     0.9437    0.8385    0.8880       260
      I-TYPE     0.5368    0.3400    0.4163       150
         

**MaxEnt (Maximum Entropy)**

In [49]:
print("Training MaxEnt Model")
maxent = FlatModelWrapper('maxent', config.get('maxent', {}))
maxent.train(X_train_ner, y_train_ner)

Training MaxEnt Model




In [50]:
y_pred_maxent = maxent.predict(X_test_ner)
y_test_tags = decode_tags(y_test_ner, id2label)
y_pred_tags = decode_tags(y_pred_maxent, id2label)
evaluate_model(y_test_tags, y_pred_tags, "MaxEnt")


‚ÑπReported Labels: ['B-AREA', 'B-ATTR', 'B-LOC', 'B-ORG', 'B-PER', 'B-PRICE', 'B-TYPE', 'I-AREA', 'I-ATTR', 'I-LOC', 'I-ORG', 'I-PER', 'I-PRICE', 'I-TYPE', 'O']
Accuracy: 0.7544
              precision    recall  f1-score   support

      B-AREA     0.6512    0.6022    0.6257       279
      B-ATTR     0.7064    0.5862    0.6407       870
       B-LOC     0.6789    0.7207    0.6992       358
       B-ORG     0.5333    0.2667    0.3556        30
       B-PER     0.5000    0.4571    0.4776        35
     B-PRICE     0.8652    0.8531    0.8592       143
      B-TYPE     0.6875    0.6471    0.6667       187
      I-AREA     0.8229    0.8187    0.8208       579
      I-ATTR     0.6652    0.5571    0.6064      1594
       I-LOC     0.7038    0.6896    0.6966       596
       I-ORG     0.5000    0.5217    0.5106        46
       I-PER     0.5862    0.4250    0.4928        40
     I-PRICE     0.9106    0.8615    0.8854       260
      I-TYPE     0.4452    0.4600    0.4525       150
         

In [51]:
def visualize_ner_predictions_ml(test_json, y_true_list, y_pred_list, id2label, tokenizer, num_samples=2):
    indices = random.sample(range(len(y_pred_list)), min(num_samples, len(y_pred_list)))
    
    for idx in indices:
        print(f"\nSentence #{idx}:")
        
        raw_text = test_json[idx]['data']['text']
        tokens = tokenizer.tokenize(raw_text)
        
        true_seq = y_true_list[idx]
        pred_seq = y_pred_list[idx]

        min_len = min(len(tokens), len(true_seq), len(pred_seq))
        tokens_show = tokens[:min_len]
        
        true_labels = []
        for x in true_seq[:min_len]:
            if isinstance(x, (int, np.integer, float, np.float64)):
                true_labels.append(id2label[int(x)])
            else:
                true_labels.append(str(x))
                
        # X·ª≠ l√Ω Pred Label
        pred_labels = []
        for x in pred_seq[:min_len]:
            if isinstance(x, (int, np.integer, float, np.float64)):
                pred_labels.append(id2label[int(x)])
            else:
                pred_labels.append(str(x))
        results = ['‚úÖ' if t == p else '‚ùå' for t, p in zip(true_labels, pred_labels)]
        
        correct = sum(1 for r in results if r == '‚úÖ')
        print(f"-> Text: {raw_text[:100]}...") 
        print(f"-> Correct: {correct}/{len(results)} ({correct/len(results):.1%})")
        
        # 4. T·∫°o DataFrame
        df = pd.DataFrame({
            'Subword': tokens_show,
            'Th·ª±c t·∫ø (True)': true_labels,
            'D·ª± ƒëo√°n (Pred)': pred_labels,
            'K·∫øt qu·∫£': results
        })
        display(df)

In [52]:
print("\n=== K·∫æT QU·∫¢ D·ª∞ ƒêO√ÅN: CRF ===")
y_pred_crf = crf.predict(X_test_crf)
visualize_ner_predictions_ml(
    test_json,
    y_test_tags,
    y_pred_crf,
    id2label,
    tokenizer,
    num_samples=1
)


=== K·∫æT QU·∫¢ D·ª∞ ƒêO√ÅN: CRF ===

Sentence #111:
-> Text: 350tr, ƒê·∫•t 5x37m CN 186m2 c√≥ th·ªï c∆∞ g·∫ßn KCN Hi·ªáp Th·∫°nh ƒë∆∞·ªùng b√™ t√¥ng √¥ t√¥ t·ªõi ƒë·∫•t V·ªã tr√≠ ·∫•p ƒê√° H√†ng,...
-> Correct: 50/62 (80.6%)


Unnamed: 0,Subword,Th·ª±c t·∫ø (True),D·ª± ƒëo√°n (Pred),K·∫øt qu·∫£
0,350@@,B-PRICE,B-PRICE,‚úÖ
1,tr@@,I-PRICE,I-PRICE,‚úÖ
2,",",O,O,‚úÖ
3,ƒê·∫•t,B-TYPE,B-TYPE,‚úÖ
4,5@@,B-AREA,B-AREA,‚úÖ
...,...,...,...,...
57,t√¥ng,I-ATTR,I-ATTR,‚úÖ
58,4m,I-ATTR,I-ATTR,‚úÖ
59,C√°ch,I-ATTR,I-ATTR,‚úÖ
60,QL@@,I-ATTR,I-ATTR,‚úÖ


In [53]:
print("\n=== K·∫æT QU·∫¢ D·ª∞ ƒêO√ÅN: SVM ===")
y_pred_svm = svm.predict(X_test_ner)
visualize_ner_predictions_ml(
    test_json,
    y_test_tags,
    y_pred_svm,
    id2label,
    tokenizer,
    num_samples=1
)


=== K·∫æT QU·∫¢ D·ª∞ ƒêO√ÅN: SVM ===

Sentence #97:
-> Text: Ch√≠nh ch·ªß nh·ªù gi√∫p l√¥ ƒë·∫•t g·∫ßn b√™n b·ªánh vi·ªán Xuy√™n √Å, T√¢y Ninh Di·ªán t√≠ch: 10m x 65m, di·ªán t√≠chCN: 636...
-> Correct: 55/68 (80.9%)


Unnamed: 0,Subword,Th·ª±c t·∫ø (True),D·ª± ƒëo√°n (Pred),K·∫øt qu·∫£
0,Ch√≠nh,B-ATTR,B-PER,‚ùå
1,ch·ªß,I-ATTR,I-PER,‚ùå
2,nh·ªù,O,O,‚úÖ
3,gi√∫p,O,O,‚úÖ
4,l√¥,B-TYPE,B-TYPE,‚úÖ
...,...,...,...,...
63,kh√°m,O,O,‚úÖ
64,Gi√°@@,O,O,‚úÖ
65,:,O,O,‚úÖ
66,2,O,O,‚úÖ


In [54]:
print("\n=== K·∫æT QU·∫¢ D·ª∞ ƒêO√ÅN: MAXENT ===")
y_pred_maxent = maxent.predict(X_test_ner)
visualize_ner_predictions_ml(
    test_json,
    y_test_tags,
    y_pred_maxent,
    id2label,
    tokenizer,
    num_samples=1
)


=== K·∫æT QU·∫¢ D·ª∞ ƒêO√ÅN: MAXENT ===

Sentence #32:
-> Text: B√°n nh√† m·∫∑t ti·ªÅn Th·∫£o ƒêi·ªÅn t·∫°i khu ph·ªë T√¢y nhi·ªÅu ti·ªán √≠ch k·∫øt n·ªëi c√°c ph·ªë Xu√¢n Th·ªßy, Qu·ªëc H∆∞∆°ng trun...
-> Correct: 50/60 (83.3%)


Unnamed: 0,Subword,Th·ª±c t·∫ø (True),D·ª± ƒëo√°n (Pred),K·∫øt qu·∫£
0,B√°n,O,O,‚úÖ
1,nh√†,B-TYPE,B-TYPE,‚úÖ
2,m·∫∑t,B-ATTR,B-ATTR,‚úÖ
3,ti·ªÅn,B-LOC,B-LOC,‚úÖ
4,Th·∫£o,O,O,‚úÖ
5,ƒêi·ªÅn,B-LOC,B-LOC,‚úÖ
6,t·∫°i,I-LOC,I-LOC,‚úÖ
7,khu,B-ATTR,B-ATTR,‚úÖ
8,ph·ªë,I-ATTR,O,‚ùå
9,T√¢y,I-ATTR,O,‚ùå


In [55]:
import joblib
import json
import os
import pickle # D√πng pickle cho CRF n·∫øu joblib l·ªói (th∆∞·ªùng joblib t·ªët h∆°n)

MODEL_DIR = '../models/ner/'
os.makedirs(MODEL_DIR, exist_ok=True)

print(f"Saving NER artifacts to {MODEL_DIR}...")

with open(os.path.join(MODEL_DIR, 'ner_id2label.json'), 'w', encoding='utf-8') as f:
    json.dump(ner_id2label, f, ensure_ascii=False, indent=2)

ner_models_list = ['crf', 'svm', 'maxent'] 

for model_name in ner_models_list:
    print(f"\nProcessing NER Model: {model_name.upper()}...")
    
    if model_name == 'crf':
        try:
            # Kh·ªüi t·∫°o CRF t·ª´ config
            crf_model = CRFModel(config.get('crf', {}))
            crf_model.train(X_train_crf, y_train_crf)
            
            # L∆∞u model
            save_path = os.path.join(MODEL_DIR, 'ner_crf.pkl')
            joblib.dump(crf_model.model, save_path)
            print(f"   -> Saved: {save_path}")
        except NameError:
            print("   ‚ö†Ô∏è Skipped CRF (X_train_crf/y_train_crf not found. Run CRF block first?)")
            
    else:
        try:
            wrapper = FlatModelWrapper(model_name, config.get(model_name, {}))
            wrapper.train(X_train_ner, y_train_ner)
            
            save_path = os.path.join(MODEL_DIR, f'ner_{model_name}.pkl')
            joblib.dump(wrapper.model, save_path)
            print(f"   -> Saved: {save_path}")
        except Exception as e:
             print(f"   ‚ùå Error saving {model_name}: {e}")

print("\nüéâ DONE! NER models saved.")

Saving NER artifacts to ../models/ner/...

Processing NER Model: CRF...
   -> Saved: ../models/ner/ner_crf.pkl

Processing NER Model: SVM...
   -> Saved: ../models/ner/ner_svm.pkl

Processing NER Model: MAXENT...




   -> Saved: ../models/ner/ner_maxent.pkl

üéâ DONE! NER models saved.
