In [23]:
import sklearn_crfsuite
from sklearn_crfsuite import metrics
from crf_funcs import *
import sklearn
import scipy.stats
from sklearn.metrics import make_scorer, f1_score
from sklearn.model_selection import RandomizedSearchCV

# Step One

In [5]:
train_sentences = file_opener("train")

X_train = [sentence_features(s) for s in train_sentences]
y_train = [sentence_labels(s, step_one=True) for s in train_sentences]

params_space = {
    'c1': scipy.stats.expon(scale=0.5),
    'c2': scipy.stats.expon(scale=0.05),
}

crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    max_iterations=100,
    all_possible_transitions=True
)

labels = ['B', 'I']

f1_scorer = make_scorer(metrics.flat_f1_score,
                        average='weighted', labels=labels)

rs = RandomizedSearchCV(crf, params_space,
                        cv=3,
                        verbose=1,
                        n_jobs=-1,
                        n_iter=50,
                        scoring=f1_scorer)

rs.fit(X_train, y_train)

print('best params:', rs.best_params_)
print('best CV score:', rs.best_score_)
print('model size: {:0.2f}M'.format(rs.best_estimator_.size_ / 1000000))

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    9.0s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  7.5min finished


best params: {'c1': 0.2195953080836135, 'c2': 0.025428990546374215}
best CV score: 0.813965433665832
model size: 1.00M


In [6]:
dev_sentences = file_opener("dev")

In [7]:
 X_train = [sentence_features(s) for s in train_sentences]
y_train = [sentence_labels(s, step_one=True) for s in train_sentences]

X_dev = [sentence_features(s) for s in dev_sentences]
y_dev = [sentence_labels(s, step_one=True) for s in dev_sentences]

crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=0.2195953080836135,
    c2=0.025428990546374215,
    max_iterations=100,
    all_possible_transitions=True
)
crf.fit(X_train, y_train)
labels = list(crf.classes_)
labels.remove('O')
print(labels)
y_predicted = crf.predict(X_dev)

f1 = metrics.flat_f1_score(y_dev, y_predicted, average='weighted', labels=labels)
print("IOB Score:", f1)

['B', 'I']
IOB Score: 0.8486726065730688


This shows a very slight improvement 

# Step Two

In [8]:
y_predicted_IOB = y_predicted

In [9]:
X_train = [sentence_features(s, step_two=True) for s in train_sentences]
y_train = [sentence_labels(s, step_two=True) for s in train_sentences]

X_dev = []
for ii in range(len(dev_sentences)):
    X_dev.append(sentence_features(dev_sentences[ii], step_two=True, predictions=y_predicted_IOB[ii]))
y_dev = [sentence_labels(s, step_two=True) for s in dev_sentences]
dev_key = [sentence_labels(s) for s in dev_sentences]

params_space = {
    'c1': scipy.stats.expon(scale=0.5),
    'c2': scipy.stats.expon(scale=0.05),
}
crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    max_iterations=100,
    all_possible_transitions=True
)

labels = ['Quality', 'Biotic_Entity', 'Eventuality', 'Location', 'Time', 'Value', 'Aggregate_Biotic_Abiotic_Entity', 'Unit', 'Abiotic_Entity']
f1_scorer = make_scorer(metrics.flat_f1_score,
                        average='weighted', labels=labels)

rs = RandomizedSearchCV(crf, params_space,
                        cv=3,
                        verbose=1,
                        n_jobs=-1,
                        n_iter=50,
                        scoring=f1_scorer)

rs.fit(X_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   11.9s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  8.4min finished


RandomizedSearchCV(cv=3, error_score='raise-deprecating',
                   estimator=CRF(algorithm='lbfgs', all_possible_states=None,
                                 all_possible_transitions=True, averaging=None,
                                 c=None, c1=None, c2=None,
                                 calibration_candidates=None,
                                 calibration_eta=None,
                                 calibration_max_trials=None,
                                 calibration_rate=None,
                                 calibration_samples=None, delta=None,
                                 epsilon=None, error_sensitive=None,...
                                        'c2': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000144959D2898>},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False,
                   scoring=make_scorer(flat_f1_score, average=weighted, labels=['Quality', 'Biotic_Entit

In [10]:
print('best params:', rs.best_params_)
print('best CV score:', rs.best_score_)
print('model size: {:0.2f}M'.format(rs.best_estimator_.size_ / 1000000))

best params: {'c1': 0.056551093543686363, 'c2': 0.060760523431746975}
best CV score: 0.8462106333597296
model size: 1.00M


In [12]:

crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=0.056551093543686363,
    c2=0.060760523431746975,
    max_iterations=100,
    all_possible_transitions=True
)

crf.fit(X_train, y_train)
labels = list(crf.classes_)
labels.remove('O')
print(labels)

y_predicted = crf.predict(X_dev)

f1 = metrics.flat_f1_score(y_dev, y_predicted, average='weighted', labels=labels)
print("Class Score:", f1)

['Quality', 'Biotic_Entity', 'Eventuality', 'Location', 'Time', 'Value', 'Aggregate_Biotic_Abiotic_Entity', 'Unit', 'Abiotic_Entity']
Class Score: 0.794712740546267


This shows no improvement over the values I had before so I won't change them