In [14]:
import pickle
import copy
import numpy as np
import sklearn
import scipy.stats
from sklearn.metrics import make_scorer
from sklearn.metrics import classification_report
# from sklearn.cross_validation import cross_val_score
# from sklearn.grid_search import RandomizedSearchCV

import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics

from sklearn.utils import shuffle
from collections import Counter

In [4]:
def get_sent_labels(token_list):
    sent_labels, sentences, sent_start = [], [], 0
    for i, line in enumerate(token_list):
        if line == '\n':
            sentences.append(sent_labels)
            sent_labels = []
        else:        
            token, label = line.rstrip().split()
            sent_labels.append(label)
    return sentences

def sent2features(sent_emb):
    features = []

    for word_emb in sent_emb:
        word_features = {}
        if len(word_emb.shape) > 0:
            for i in range(word_emb.shape[0]):
                word_features['bert_features_{}'.format(i)] = float(word_emb[i])
        else:
            word_features['bert_features_0'] = float(word_emb)
            
        features.append(copy.deepcopy(word_features))
        del word_features
    
    return features

def merge_features(bert_features, other_features):
    
    for sent_emb_features, sent_other_features in zip(bert_features, other_features):
        
        for word_emb_features, word_other_features in zip(sent_emb_features[:len(sent_other_features)], sent_other_features):
            word_other_features.update(word_emb_features)
        
        if len(sent_other_features) > len(sent_emb_features):
            for _ in range(len(sent_other_features)-len(sent_emb_features)):
                sent_other_features.pop()

In [9]:
wm1 = open('../../data_wm/arg_clean_45_1/test.txt','r').readlines()
wm1_labels = get_sent_labels(wm1)
wm1_features = pickle.load(open('../features/wm1_emb.p','rb'))
print(len(wm1_features), len(wm1_labels))

wm2 = open('../../data_wm/arg_clean_45_2/train.txt','r').readlines()
wm2_labels = get_sent_labels(wm2)
wm2_features = pickle.load(open('../features/wm2_emb.p','rb'))
print(len(wm2_features), len(wm2_labels))

wm_nr = open('../../data_wm/wm_narrative/test.txt','r').readlines()
wm_nr_labels = get_sent_labels(wm_nr)

wm_nr_features = pickle.load(open('../features/wm_nr_emb.p','rb'))
print(len(wm_nr_features), len(wm_nr_labels))

1266 1266
1862 1862
1332 1332


In [10]:
wm1_lexsyn = pickle.load(open('../features/wm1_lexsyn.p','rb'))
wm2_lexsyn = pickle.load(open('../features/wm2_lexsyn.p','rb'))
wm_nr_lexsyn = pickle.load(open('../features/wm_nr_lexsyn.p','rb'))

In [11]:
wm1_all = pickle.load(open('../features/wm1_all.p','rb'))
wm2_all = pickle.load(open('../features/wm2_all.p','rb'))
wm_nr_all = pickle.load(open('../features/wm_nr_all.p','rb'))

In [12]:
count, max1, max2 = 0, 800, 300
wm2_features, wm2_lexsyn, wm2_all, wm2_labels = shuffle(wm2_features, wm2_lexsyn, wm2_all, wm2_labels, random_state=0)

wm2_features_ds,  wm2_lexsyn_ds,  wm2_all_ds,  wm2_labels_ds  = [], [], [], []
wm2_features_ds2, wm2_lexsyn_ds2, wm2_all_ds2, wm2_labels_ds2 = [], [], [], []

for bert, lexsyn, _all, labels in zip(wm2_features, wm2_lexsyn, wm2_all, wm2_labels):
    
    if all([label == 'O-claim' for label in labels]):
        
        if count < max1:
            wm2_features_ds.append(bert)
            wm2_lexsyn_ds.append(lexsyn)
            wm2_all_ds.append(_all)
            wm2_labels_ds.append(labels)
            
            if count < max2:
                wm2_features_ds2.append(bert)
                wm2_lexsyn_ds2.append(lexsyn)
                wm2_all_ds2.append(_all)
                wm2_labels_ds2.append(labels)
        
        count += 1
    
    else:
        
        wm2_features_ds.append(bert)
        wm2_lexsyn_ds.append(lexsyn)
        wm2_all_ds.append(_all)
        wm2_labels_ds.append(labels)
        
        wm2_features_ds2.append(bert)
        wm2_lexsyn_ds2.append(lexsyn)
        wm2_all_ds2.append(_all)
        wm2_labels_ds2.append(labels)
        

In [15]:
Counter([label for labels in wm2_labels_ds2 for label in labels]), \
Counter([label for labels in wm2_labels_ds for label in labels]), \
Counter([label for labels in wm2_labels for label in labels])

(Counter({'O-claim': 10909, 'B-claim': 951, 'I-claim': 10263}),
 Counter({'O-claim': 20326, 'B-claim': 951, 'I-claim': 10263}),
 Counter({'O-claim': 25332, 'B-claim': 951, 'I-claim': 10263}))

# BERT features

In [16]:
# bert embeddings only
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
crf.fit(wm2_features_ds, wm2_labels_ds)

y_pred = crf.predict(wm1_features)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_features)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.487     0.321     0.387       707
     I-claim      0.623     0.485     0.546      7407
     O-claim      0.778     0.865     0.819     16841

    accuracy                          0.737     24955
   macro avg      0.630     0.557     0.584     24955
weighted avg      0.724     0.737     0.726     24955

              precision    recall  f1-score   support

     B-claim      0.040     0.297     0.071        37
     I-claim      0.070     0.509     0.123       350
     O-claim      0.990     0.878     0.931     21465

    accuracy                          0.871     21852
   macro avg      0.367     0.561     0.375     21852
weighted avg      0.974     0.871     0.916     21852



In [17]:
# bert embeddings only
crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_features_ds, wm2_labels_ds)

y_pred = crf.predict(wm1_features)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_features)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.483     0.199     0.282       707
     I-claim      0.714     0.261     0.383      7407
     O-claim      0.730     0.951     0.826     16841

    accuracy                          0.725     24955
   macro avg      0.642     0.471     0.497     24955
weighted avg      0.718     0.725     0.679     24955

              precision    recall  f1-score   support

     B-claim      0.065     0.189     0.097        37
     I-claim      0.119     0.237     0.158       350
     O-claim      0.986     0.967     0.976     21465

    accuracy                          0.954     21852
   macro avg      0.390     0.464     0.411     21852
weighted avg      0.971     0.954     0.962     21852



In [18]:
# bert embeddings only
crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_features_ds2, wm2_labels_ds2)

y_pred = crf.predict(wm1_features)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_features)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.348     0.504     0.411       707
     I-claim      0.453     0.850     0.591      7407
     O-claim      0.883     0.525     0.659     16841

    accuracy                          0.621     24955
   macro avg      0.561     0.626     0.554     24955
weighted avg      0.740     0.621     0.631     24955

              precision    recall  f1-score   support

     B-claim      0.018     0.459     0.035        37
     I-claim      0.036     0.934     0.070       350
     O-claim      0.998     0.556     0.714     21465

    accuracy                          0.562     21852
   macro avg      0.351     0.650     0.273     21852
weighted avg      0.981     0.562     0.702     21852



# LexSyn

In [19]:
crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_lexsyn_ds, wm2_labels_ds)

y_pred = crf.predict(wm1_lexsyn)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_lexsyn)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.544     0.338     0.417       707
     I-claim      0.650     0.458     0.537      7407
     O-claim      0.774     0.886     0.826     16841

    accuracy                          0.744     24955
   macro avg      0.656     0.561     0.593     24955
weighted avg      0.730     0.744     0.729     24955

              precision    recall  f1-score   support

     B-claim      0.044     0.270     0.075        37
     I-claim      0.068     0.469     0.119       350
     O-claim      0.989     0.886     0.935     21465

    accuracy                          0.878     21852
   macro avg      0.367     0.542     0.376     21852
weighted avg      0.973     0.878     0.920     21852



In [20]:
crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_lexsyn_ds2, wm2_labels_ds2)

y_pred = crf.predict(wm1_lexsyn)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_lexsyn)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.449     0.482     0.465       707
     I-claim      0.538     0.644     0.586      7407
     O-claim      0.815     0.741     0.776     16841

    accuracy                          0.705     24955
   macro avg      0.600     0.623     0.609     24955
weighted avg      0.722     0.705     0.711     24955

              precision    recall  f1-score   support

     B-claim      0.026     0.378     0.049        37
     I-claim      0.042     0.657     0.078       350
     O-claim      0.992     0.730     0.841     21465

    accuracy                          0.728     21852
   macro avg      0.353     0.588     0.323     21852
weighted avg      0.975     0.728     0.827     21852



# BERT + LexSyn features

In [21]:
# x_lexsyn has both lexsyn and bert features after using merge
merge_features(wm1_features, wm1_lexsyn)
merge_features(wm2_features_ds, wm2_lexsyn_ds)
merge_features(wm2_features_ds2, wm2_lexsyn_ds2)
merge_features(wm_nr_features, wm_nr_lexsyn)

In [22]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
crf.fit(wm2_lexsyn_ds, wm2_labels_ds)

y_pred = crf.predict(wm1_lexsyn)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_lexsyn)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.552     0.396     0.461       707
     I-claim      0.681     0.543     0.604      7407
     O-claim      0.801     0.882     0.840     16841

    accuracy                          0.768     24955
   macro avg      0.678     0.607     0.635     24955
weighted avg      0.758     0.768     0.759     24955

              precision    recall  f1-score   support

     B-claim      0.056     0.351     0.096        37
     I-claim      0.101     0.591     0.173       350
     O-claim      0.992     0.904     0.946     21465

    accuracy                          0.898     21852
   macro avg      0.383     0.616     0.405     21852
weighted avg      0.976     0.898     0.932     21852



In [23]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
crf.fit(wm2_lexsyn_ds2, wm2_labels_ds2)

y_pred = crf.predict(wm1_lexsyn)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_lexsyn)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.473     0.515     0.493       707
     I-claim      0.585     0.683     0.630      7407
     O-claim      0.836     0.772     0.803     16841

    accuracy                          0.738     24955
   macro avg      0.631     0.656     0.642     24955
weighted avg      0.751     0.738     0.743     24955

              precision    recall  f1-score   support

     B-claim      0.031     0.432     0.058        37
     I-claim      0.055     0.634     0.101       350
     O-claim      0.992     0.799     0.885     21465

    accuracy                          0.796     21852
   macro avg      0.359     0.622     0.348     21852
weighted avg      0.975     0.796     0.871     21852



# All Discrete Features

In [24]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
crf.fit(wm2_all_ds, wm2_labels_ds)

y_pred = crf.predict(wm1_all)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_all)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.466     0.392     0.425       707
     I-claim      0.570     0.631     0.599      7407
     O-claim      0.815     0.782     0.798     16841

    accuracy                          0.726     24955
   macro avg      0.617     0.602     0.607     24955
weighted avg      0.732     0.726     0.728     24955

              precision    recall  f1-score   support

     B-claim      0.041     0.324     0.073        37
     I-claim      0.054     0.526     0.098       350
     O-claim      0.990     0.838     0.908     21465

    accuracy                          0.832     21852
   macro avg      0.362     0.563     0.360     21852
weighted avg      0.973     0.832     0.893     21852



In [25]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
crf.fit(wm2_all_ds2, wm2_labels_ds2)

y_pred = crf.predict(wm1_all)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_all)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.473     0.501     0.486       707
     I-claim      0.519     0.709     0.599      7407
     O-claim      0.833     0.697     0.759     16841

    accuracy                          0.695     24955
   macro avg      0.608     0.636     0.615     24955
weighted avg      0.730     0.695     0.704     24955

              precision    recall  f1-score   support

     B-claim      0.043     0.270     0.074        37
     I-claim      0.061     0.463     0.107       350
     O-claim      0.989     0.873     0.927     21465

    accuracy                          0.865     21852
   macro avg      0.364     0.535     0.369     21852
weighted avg      0.973     0.865     0.913     21852



# All Discrete + BERT

In [26]:
# x_all has both all and bert features after using merge
merge_features(wm1_features, wm1_all)
merge_features(wm2_all_ds, wm2_all_ds)
merge_features(wm2_all_ds2, wm2_all_ds2)
merge_features(wm_nr_features, wm_nr_all)

In [27]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
# crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_all_ds, wm2_labels_ds)

y_pred = crf.predict(wm1_all)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_all)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.466     0.392     0.425       707
     I-claim      0.570     0.631     0.599      7407
     O-claim      0.815     0.782     0.798     16841

    accuracy                          0.726     24955
   macro avg      0.617     0.602     0.607     24955
weighted avg      0.732     0.726     0.728     24955

              precision    recall  f1-score   support

     B-claim      0.041     0.324     0.073        37
     I-claim      0.054     0.526     0.098       350
     O-claim      0.990     0.838     0.908     21465

    accuracy                          0.832     21852
   macro avg      0.362     0.563     0.360     21852
weighted avg      0.973     0.832     0.893     21852



In [28]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
crf.fit(wm2_all_ds2, wm2_labels_ds2)

y_pred = crf.predict(wm1_all)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_all)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.473     0.501     0.486       707
     I-claim      0.519     0.709     0.599      7407
     O-claim      0.833     0.697     0.759     16841

    accuracy                          0.695     24955
   macro avg      0.608     0.636     0.615     24955
weighted avg      0.730     0.695     0.704     24955

              precision    recall  f1-score   support

     B-claim      0.043     0.270     0.074        37
     I-claim      0.061     0.463     0.107       350
     O-claim      0.989     0.873     0.927     21465

    accuracy                          0.865     21852
   macro avg      0.364     0.535     0.369     21852
weighted avg      0.973     0.865     0.913     21852

