In [1]:
import pickle
import copy
import numpy as np
import sklearn
import scipy.stats
from sklearn.metrics import make_scorer
from sklearn.metrics import classification_report
# from sklearn.cross_validation import cross_val_score
# from sklearn.grid_search import RandomizedSearchCV

import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics

In [8]:
def get_sent_labels(token_list):
    sent_labels, sentences, sent_start = [], [], 0
    for i, line in enumerate(token_list):
        if line == '\n':
            sentences.append(sent_labels)
            sent_labels = []
        else:        
            token, label = line.rstrip().split()
            sent_labels.append(label)
    return sentences

def get_sent_tokens(token_list):
    sent_tokens, sentences, sent_start = [], [], 0
    for i, line in enumerate(token_list):
        if line == '\n':
            sentences.append(sent_tokens)
            sent_tokens = []
        else:        
            token, label = line.rstrip().split()
            sent_tokens.append(token)
    return sentences

def sent2features(sent_emb):
    features = []

    for word_emb in sent_emb:
        word_features = {}
        if len(word_emb.shape) > 0:
            for i in range(word_emb.shape[0]):
                word_features['bert_features_{}'.format(i)] = float(word_emb[i])
        else:
            word_features['bert_features_0'] = float(word_emb)
            
        features.append(copy.deepcopy(word_features))
        del word_features
    
    return features

def merge_features(bert_features, other_features):
    
    for sent_emb_features, sent_other_features in zip(bert_features, other_features):
        
        for word_emb_features, word_other_features in zip(sent_emb_features[:len(sent_other_features)], sent_other_features):
            word_other_features.update(word_emb_features)
        
        if len(sent_other_features) > len(sent_emb_features):
            for _ in range(len(sent_other_features)-len(sent_emb_features)):
                sent_other_features.pop()

# BERT features

In [3]:
wm1 = open('../../data_wm/arg_clean_45_1/test.txt','r').readlines()
wm1_labels = get_sent_labels(wm1)
wm1_features = pickle.load(open('../features/wm1_emb.p','rb'))
print(len(wm1_features), len(wm1_labels))

wm2 = open('../../data_wm/arg_clean_45_2/train.txt','r').readlines()
wm2_labels = get_sent_labels(wm2)
wm2_features = pickle.load(open('../features/wm2_emb.p','rb'))
print(len(wm2_features), len(wm2_labels))

wm_nr = open('../../data_wm/wm_narrative/test.txt','r').readlines()
wm_nr_labels = get_sent_labels(wm_nr)

wm_nr_features = pickle.load(open('../features/wm_nr_emb.p','rb'))
print(len(wm_nr_features), len(wm_nr_labels))

1266 1266
1862 1862
1332 1332


In [9]:
wm1_tokens = get_sent_tokens(wm1)
wm2_tokens = get_sent_tokens(wm2)
wm_nr_tokens = get_sent_tokens(wm_nr)

In [12]:
# bert embeddings only
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
crf.fit(wm2_features, wm2_labels)

y_pred = crf.predict(wm1_features)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_features)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.527     0.291     0.375       707
     I-claim      0.662     0.432     0.523      7407
     O-claim      0.768     0.900     0.829     16841

    accuracy                          0.744     24955
   macro avg      0.652     0.541     0.576     24955
weighted avg      0.730     0.744     0.725     24955

              precision    recall  f1-score   support

     B-claim      0.053     0.270     0.089        37
     I-claim      0.092     0.426     0.151       350
     O-claim      0.989     0.923     0.955     21465

    accuracy                          0.914     21852
   macro avg      0.378     0.540     0.398     21852
weighted avg      0.973     0.914     0.941     21852



In [18]:
# bert embeddings only
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_features, wm2_labels)

y_pred = crf.predict(wm1_features)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_features)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.475     0.327     0.387       707
     I-claim      0.603     0.473     0.530      7407
     O-claim      0.773     0.856     0.812     16841

    accuracy                          0.728     24955
   macro avg      0.617     0.552     0.577     24955
weighted avg      0.714     0.728     0.717     24955

              precision    recall  f1-score   support

     B-claim      0.052     0.324     0.090        37
     I-claim      0.089     0.511     0.152       350
     O-claim      0.990     0.905     0.946     21465

    accuracy                          0.898     21852
   macro avg      0.377     0.580     0.396     21852
weighted avg      0.974     0.898     0.931     21852



In [17]:
# bert embeddings only
crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_features, wm2_labels)

y_pred = crf.predict(wm1_features)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_features)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.495     0.355     0.414       707
     I-claim      0.648     0.529     0.582      7407
     O-claim      0.794     0.868     0.830     16841

    accuracy                          0.753     24955
   macro avg      0.646     0.584     0.608     24955
weighted avg      0.742     0.753     0.744     24955

              precision    recall  f1-score   support

     B-claim      0.057     0.324     0.096        37
     I-claim      0.112     0.580     0.188       350
     O-claim      0.992     0.916     0.952     21465

    accuracy                          0.910     21852
   macro avg      0.387     0.607     0.412     21852
weighted avg      0.976     0.910     0.939     21852



# LexSyn

In [23]:
wm1_lexsyn = pickle.load(open('../features/wm1_lexsyn.p','rb'))
wm2_lexsyn = pickle.load(open('../features/wm2_lexsyn.p','rb'))
wm_nr_lexsyn = pickle.load(open('../features/wm_nr_lexsyn.p','rb'))

In [20]:
# LexSyn only
# crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_lexsyn, wm2_labels)

y_pred = crf.predict(wm1_lexsyn)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_lexsyn)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.584     0.301     0.397       707
     I-claim      0.703     0.379     0.493      7407
     O-claim      0.757     0.926     0.833     16841

    accuracy                          0.746     24955
   macro avg      0.681     0.535     0.574     24955
weighted avg      0.736     0.746     0.720     24955

              precision    recall  f1-score   support

     B-claim      0.057     0.243     0.092        37
     I-claim      0.095     0.406     0.154       350
     O-claim      0.989     0.931     0.959     21465

    accuracy                          0.921     21852
   macro avg      0.380     0.526     0.402     21852
weighted avg      0.973     0.921     0.944     21852



# BERT + LexSyn features

In [24]:
# x_lexsyn has both lexsyn and bert features after using merge
merge_features(wm1_features, wm1_lexsyn)
merge_features(wm2_features, wm2_lexsyn)
merge_features(wm_nr_features, wm_nr_lexsyn)

In [26]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
# crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_lexsyn, wm2_labels)

y_pred = crf.predict(wm1_lexsyn)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_lexsyn)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.572     0.337     0.424       707
     I-claim      0.718     0.478     0.574      7407
     O-claim      0.785     0.914     0.845     16841

    accuracy                          0.768     24955
   macro avg      0.692     0.576     0.614     24955
weighted avg      0.759     0.768     0.752     24955

              precision    recall  f1-score   support

     B-claim      0.050     0.216     0.082        37
     I-claim      0.117     0.494     0.189       350
     O-claim      0.990     0.932     0.960     21465

    accuracy                          0.924     21852
   macro avg      0.386     0.548     0.410     21852
weighted avg      0.975     0.924     0.947     21852



# All Discrete Features

In [4]:
wm1_all = pickle.load(open('../features/wm1_all.p','rb'))
wm2_all = pickle.load(open('../features/wm2_all.p','rb'))
wm_nr_all = pickle.load(open('../features/wm_nr_all.p','rb'))

In [38]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
# crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_all, wm2_labels)

y_pred = crf.predict(wm1_all)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.433     0.465     0.449       707
     I-claim      0.552     0.674     0.607      7407
     O-claim      0.829     0.745     0.785     16841

    accuracy                          0.716     24955
   macro avg      0.605     0.628     0.614     24955
weighted avg      0.735     0.716     0.722     24955



In [46]:
y_pred = crf.predict(wm2_all)
y_test_flat = [y for y_seq in wm2_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.523     0.587     0.553       951
     I-claim      0.636     0.772     0.697     10263
     O-claim      0.891     0.809     0.848     25332

    accuracy                          0.793     36546
   macro avg      0.683     0.723     0.700     36546
weighted avg      0.810     0.793     0.798     36546



In [32]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
# crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_all, wm2_labels)

y_pred = crf.predict(wm1_all)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_all)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.433     0.465     0.449       707
     I-claim      0.552     0.674     0.607      7407
     O-claim      0.829     0.745     0.785     16841

    accuracy                          0.716     24955
   macro avg      0.605     0.628     0.614     24955
weighted avg      0.735     0.716     0.722     24955

              precision    recall  f1-score   support

     B-claim      0.048     0.378     0.085        37
     I-claim      0.067     0.523     0.120       350
     O-claim      0.990     0.870     0.926     21465

    accuracy                          0.863     21852
   macro avg      0.369     0.590     0.377     21852
weighted avg      0.974     0.863     0.912     21852



# All Discrete + BERT

In [5]:
# x_all has both all and bert features after using merge
merge_features(wm1_features, wm1_all)
merge_features(wm2_features, wm2_all)
merge_features(wm_nr_features, wm_nr_all)

In [6]:
crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
# crf = sklearn_crfsuite.CRF(algorithm='l2sgd', c2=0.1, max_iterations=1000, all_possible_transitions=True)
crf.fit(wm2_all, wm2_labels)

y_pred = crf.predict(wm1_all)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

y_pred = crf.predict(wm_nr_all)
y_test_flat = [y for y_seq in wm_nr_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]
print(classification_report(y_test_flat, y_pred_flat, digits=3))

              precision    recall  f1-score   support

     B-claim      0.475     0.437     0.455       707
     I-claim      0.585     0.661     0.621      7407
     O-claim      0.828     0.783     0.805     16841

    accuracy                          0.737     24955
   macro avg      0.629     0.627     0.627     24955
weighted avg      0.746     0.737     0.741     24955

              precision    recall  f1-score   support

     B-claim      0.038     0.189     0.064        37
     I-claim      0.074     0.440     0.126       350
     O-claim      0.989     0.902     0.943     21465

    accuracy                          0.893     21852
   macro avg      0.367     0.510     0.378     21852
weighted avg      0.973     0.893     0.929     21852



In [14]:
y_pred = crf.predict(wm1_all)
y_test_flat = [y for y_seq in wm1_labels for y in y_seq]
y_pred_flat = [y for y_seq in y_pred for y in y_seq]

In [16]:
for i, (tokens, labels, preds) in enumerate(zip(wm1_tokens[:100], wm1_labels[:100],y_pred[:100])):
    print(i+1)
    print(tokens)
    print(labels)
    print(preds)
    print()

1
['Sharks', 'are', 'Not', 'the', 'Problem']
['O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim']
['O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim']

2
['You', 'know', 'what', 'I', "'", 'm', 'talking', 'about', ',', 'the', 'large', 'fish', 'that', 'can', 'kill', 'you', 'with', 'one', 'advance', 'at', 'your', 'limbs', '.']
['O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim']
['O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim']

3
['One', 'of', 'many', 'things', 'lurking', 'in', 'the', 'unknown', 'waters', 'of', 'the', 'ocean', 'ready', 'for', 'the', 'amazing', 'taste', 'of', 'your', '

In [27]:
# count short vs long sentences
count_long, count_comma, count_modal, count_multi_claim = 0, 0, 0, 0
for i, (tokens, labels, preds) in enumerate(zip(wm1_tokens[:100], wm1_labels[:100],y_pred[:100])):
    if len(tokens)> 15:
        count_long += 1
    if ',' in tokens:
        count_comma += 1
    modal = [token for token in tokens if token in ['should','would','can','could','must','will']]
    if len(modal)>0:
        count_modal+=1
    if len([label for label in labels if label=='B-claim'])>1:
        count_multi_claim+=1
        print(tokens, '\n', labels, '\n', preds, '\n')
        
print(count_long, count_comma, count_modal, count_multi_claim)

['Sharks', 'are', "n't", 'monsters', ',', 'in', 'reality', ',', 'they', 'are', 'very', 'special', 'animals', ',', 'what', 'I', 'mean', 'by', 'that', 'is', 'getting', 'rid', 'of', 'them', 'could', 'cause', 'the', 'whole', 'oceans', 'food', 'chain', 'to', 'collapse', '.'] 
 ['B-claim', 'I-claim', 'I-claim', 'I-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'B-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'B-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'O-claim'] 
 ['B-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'O-claim', 'B-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim', 'I-claim

In [28]:
bert_pred = open('/Users/talhindi/Documents/eval_preds_results_wm_argu_bert_train_wm_argu2_gre.txt').readlines()

In [33]:
sent_ends = [tokens[-2:] for tokens in wm1_tokens[:100]]

In [35]:
sent, bert_sent = [], []
prev_line, i = bert_pred[0], 0

for line in bert_pred[:1898]:
    
    if prev_line[0] == sent_ends[i][0] and line[0] == sent_ends[i][1]:
        bert_sent.append(sent)
        sent = []
        i +=1
    else:
        sent.append(line.split('\t'))
    
    prev_line = line

len(bert_sent)

0