In [2]:
import sklearn_crfsuite
from sklearn_crfsuite import metrics

# Example dataset
data = [
    [('John', 'B-PER'), ('lives', 'O'), ('in', 'O'), ('New', 'B-LOC'), ('York', 'I-LOC')],
    [('He', 'O'), ('works', 'O'), ('at', 'O'), ('Google', 'B-ORG')]
]

# Feature extraction functions
def word2features(sent, i):
    word = sent[i][0]

    features = {
        'bias': 1.0,
        'word.lower()': word.lower(),
        'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
    }
    if i > 0:
        word1 = sent[i-1][0]
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
        })
    else:
        features['BOS'] = True

    if i < len(sent)-1:
        word1 = sent[i+1][0]
        features.update({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
        })
    else:
        features['EOS'] = True

    return features

def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def sent2labels(sent):
    return [label for token, label in sent]

def sent2tokens(sent):
    return [token for token, label in sent]

# Preparing the data
X_train = [sent2features(s) for s in data]
y_train = [sent2labels(s) for s in data]

# Training the CRF model
crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=0.1,
    c2=0.1,
    max_iterations=100,
    all_possible_transitions=True
)
crf.fit(X_train, y_train)

# Making predictions
y_pred = crf.predict(X_train)

# Evaluating the model
labels = list(crf.classes_)
f1_score = metrics.flat_f1_score(y_train, y_pred, average='weighted', labels=labels)
print(f"F1-score: {f1_score}")

ModuleNotFoundError: No module named 'sklearn_crfsuite'

In [None]:
pip install sklearn-crfsuite
