In [5]:
import pickle
import numpy as np

from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from collections import Counter

In [2]:
def artcile_split_and_pred_para(sentences, predictions, add_one_pred=True):
    'Retruns article splitted along with their sentences labels'
    
    if add_one_pred:
        predictions.insert(0,'0\n')
    assert len(sentences) == len(predictions)
    
    article, article_splits, features = [], [], []
    for i, (sent, pred) in enumerate(zip(sentences, predictions)):
        if sent == 'ARTICLE_SPLIT_LINE\t0\n':
            article_splits.append(article)
            features.append(get_claim_premise_percentages(article))
            article = []
        else:
            article.append(( sent.split('\t')[0], int(pred.rstrip()) ))
    
    return article_splits, features
def get_claim_premise_percentages(article):
    'Return claim and premise sentences percentences of an article'
    sent, claim, premise, none = 0, 0, 0, 0
    
    for _, label in article:
        if label == 1:
            claim += 1; sent +=1
        elif label == 2:
            premise += 1; sent +=1
        else:
            none += 1; sent += 1
    
    if sent == 0:
        return 0, 0, 0
    else:
        return claim/sent, premise/sent, none/sent



def artcile_split_and_pred_emb(sentences, predictions):
    'Retruns article splitted along with their sentences labels'
    predictions.insert(0,'0\n')
    article, article_splits = [], []
    for i, (sent, pred) in enumerate(zip(sentences, predictions)):
        if sent == 'ARTICLE_SPLIT_LINE\t0\n':
            article_splits.append(article)
            article = []
        else:
            article.append(( sent.split('\t')[0], int(pred.rstrip()) ))
    return article_splits
def article_sent_labels_emb(article_sent_labels):
    X = []
    for _, label in article_sent_labels:
        X.append(label)
    return X
def prepare_data(articles, sentences, predictions , mode='not_categorical'):
    arg = artcile_split_and_pred_emb(sentences, predictions)
    features, labels = [], []
    for article_arg, article_lable in zip(arg, articles):
        features.append(article_sent_labels_emb(article_arg))
        labels.append(int(article_lable.split('\t')[1].rstrip()))
    if mode == 'categorical':
        return np.array(features), np_utils.to_categorical(labels)
    else: # mode == 'not_categorial'
        return np.array(features), np.array(labels)


def get_desired_layer_as_feature(embeddings, layers, get_sum=True):
    '''Returns the selected layer as features from a given embeddings object of the top layers of [CLS] embeddings
        layers : list of ids in [1,2,3,4], with a max length of 4
        get_sum : True if sum, set to False if average of layers is desired'''
    features = []
    for item in embeddings:
        CLS_sum = np.zeros(768)
        for layer in layers:
            CLS_sum += item['features'][0]['layers'][layer-1]['values']
        if get_sum: # sum of layers
            features.append(CLS_sum)
        else: # average of layers
            features.append(CLS_sum/len(layers))
    return features

In [3]:
train_articles = open('data/train.tsv').readlines()
train_sent = open('data/train_sent_fixed/dev.tsv').readlines()
train_pred = open('data/train_sent_fixed/predictions_editorial-claim-premise-bert.txt').readlines()

dev_articles = open('data/dev.tsv').readlines()
dev_sent = open('data/dev_sent_fixed/dev.tsv').readlines()
dev_pred = open('../coling/data/dev_sent_fixed/predictions_editorial-claim-premise-bert.txt').readlines()

test_articles = open('data/test/dev.tsv').readlines()
test_sent = open('data/test_sent/dev.tsv').readlines()
test_pred = open('data/test_sent/predictions_editorial-claim-premise-bert.txt').readlines()

_, train_labels = prepare_data(train_articles, train_sent, train_pred)
_, dev_labels = prepare_data(dev_articles, dev_sent, dev_pred)
_, test_labels = prepare_data(test_articles, test_sent, test_pred)

train_articles, train_features = artcile_split_and_pred_para(train_sent, train_pred, add_one_pred=False)
dev_articles, dev_features = artcile_split_and_pred_para(dev_sent, dev_pred, add_one_pred=False)
test_articles, test_features = artcile_split_and_pred_para(test_sent, test_pred, add_one_pred=False)


train_embeddings = pickle.load(open('data/train.pkl','rb'))
dev_embeddings = pickle.load(open('data/dev.pkl','rb'))
test_embeddings = pickle.load(open('data/test/test.pkl','rb'))

train_embeddings_features = np.array(get_desired_layer_as_feature(train_embeddings, [4]))
dev_embeddings_features = np.array(get_desired_layer_as_feature(dev_embeddings, [4]))
test_embeddings_features = np.array(get_desired_layer_as_feature(test_embeddings, [4]))

print(train_embeddings_features[0].shape, type(train_embeddings_features))
print(dev_embeddings_features[0].shape, type(dev_embeddings_features))
print(test_embeddings_features[0].shape, type(test_embeddings_features))

(768,) <class 'numpy.ndarray'>
(768,) <class 'numpy.ndarray'>
(768,) <class 'numpy.ndarray'>


In [4]:
model = LinearSVC()
model.fit(train_embeddings_features, list(train_labels))
pred = model.predict(train_embeddings_features)
print(classification_report(list(train_labels), pred))


pred = model.predict(dev_embeddings_features)
print(classification_report(list(dev_labels), pred))

pred = model.predict(test_embeddings_features)
print(classification_report(list(test_labels), pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3193
           1       1.00      1.00      1.00      3193

    accuracy                           1.00      6386
   macro avg       1.00      1.00      1.00      6386
weighted avg       1.00      1.00      1.00      6386

              precision    recall  f1-score   support

           0       0.99      0.92      0.96      3083
           1       0.58      0.93      0.72       353

    accuracy                           0.92      3436
   macro avg       0.79      0.93      0.84      3436
weighted avg       0.95      0.92      0.93      3436

              precision    recall  f1-score   support

           0       0.95      0.96      0.95      1387
           1       0.86      0.82      0.84       418

    accuracy                           0.93      1805
   macro avg       0.90      0.89      0.89      1805
weighted avg       0.92      0.93      0.93      1805





## Balanced Dev and Test sets

In [26]:
from sklearn.utils import shuffle

shuf_dev_emb, shuf_dev_features, shuf_dev_labels = shuffle(dev_embeddings_features, dev_features, list(dev_labels), random_state=293)
dev_count, dev_limit = 0,353
bal_dev_emb, bal_dev_features, bal_dev_labels = [], [], []


for e,f,l in zip(shuf_dev_emb, shuf_dev_features, shuf_dev_labels):
    if l ==0 and dev_count < dev_limit:
        bal_dev_emb.append(e)
        bal_dev_features.append(f)
        bal_dev_labels.append(l)
        dev_count += 1
    elif l==1:
        bal_dev_emb.append(e)
        bal_dev_features.append(f)
        bal_dev_labels.append(l)

# bal_dev_emb, bal_dev_features = np.array(bal_dev_emb), np.array(bal_dev_features)


shuf_test_emb, shuf_test_features, shuf_test_labels = shuffle(test_embeddings_features, test_features, list(test_labels), random_state=293)
test_count, test_limit = 0, 418
bal_test_emb, bal_test_features, bal_test_labels = [], [], []

for e,f,l in zip(shuf_test_emb, shuf_test_features, shuf_test_labels):
    if l ==0 and test_count < test_limit:
        bal_test_emb.append(e)
        bal_test_features.append(f)
        bal_test_labels.append(l)
        test_count += 1
    elif l==1:
        bal_test_emb.append(e)
        bal_test_features.append(f)
        bal_test_labels.append(l)

# bal_test_emb, bal_test_features = np.array(bal_test_emb), np.array(bal_test_features)


In [27]:
pred = model.predict(bal_dev_emb)
print(classification_report(list(bal_dev_labels), pred))

pred = model.predict(bal_test_emb)
print(classification_report(list(bal_test_labels), pred))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93       353
           1       0.93      0.93      0.93       353

    accuracy                           0.93       706
   macro avg       0.93      0.93      0.93       706
weighted avg       0.93      0.93      0.93       706

              precision    recall  f1-score   support

           0       0.84      0.96      0.90       418
           1       0.96      0.82      0.88       418

    accuracy                           0.89       836
   macro avg       0.90      0.89      0.89       836
weighted avg       0.90      0.89      0.89       836



In [31]:
train_combined_features = [list(emb)+list(arg) for emb, arg in zip(train_embeddings_features, train_features)]
dev_combined_features = [list(emb)+list(arg) for emb, arg in zip(dev_embeddings_features, dev_features)]
test_combined_features = [list(emb)+list(arg) for emb, arg in zip(test_embeddings_features, test_features)]

model = LinearSVC()

model.fit(train_combined_features, list(train_labels))
pred = model.predict(train_combined_features)
print(classification_report(list(train_labels), pred))


pred = model.predict(dev_combined_features)
print(classification_report(list(dev_labels), pred))

pred = model.predict(test_combined_features)
print(classification_report(list(test_labels), pred))



              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3193
           1       1.00      1.00      1.00      3193

    accuracy                           1.00      6386
   macro avg       1.00      1.00      1.00      6386
weighted avg       1.00      1.00      1.00      6386

              precision    recall  f1-score   support

           0       0.99      0.92      0.96      3083
           1       0.58      0.93      0.71       353

    accuracy                           0.92      3436
   macro avg       0.78      0.93      0.83      3436
weighted avg       0.95      0.92      0.93      3436

              precision    recall  f1-score   support

           0       0.95      0.96      0.95      1387
           1       0.85      0.82      0.83       418

    accuracy                           0.92      1805
   macro avg       0.90      0.89      0.89      1805
weighted avg       0.92      0.92      0.92      1805



In [32]:
bal_dev_combined_features = [list(emb)+list(arg) for emb, arg in zip(bal_dev_emb, bal_dev_features)]
bal_test_combined_features = [list(emb)+list(arg) for emb, arg in zip(bal_test_emb, bal_test_features)]

pred = model.predict(bal_dev_combined_features)
print(classification_report(list(bal_dev_labels), pred))

pred = model.predict(bal_test_combined_features)
print(classification_report(list(bal_test_labels), pred))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93       353
           1       0.93      0.93      0.93       353

    accuracy                           0.93       706
   macro avg       0.93      0.93      0.93       706
weighted avg       0.93      0.93      0.93       706

              precision    recall  f1-score   support

           0       0.84      0.96      0.90       418
           1       0.95      0.82      0.88       418

    accuracy                           0.89       836
   macro avg       0.90      0.89      0.89       836
weighted avg       0.90      0.89      0.89       836



## Separate Dev/Test sets for Editorials and Letters

In [6]:
dev_set_ids = pickle.load(open('dev_set_ids_edi_let.p','rb'))
dev_set_ids.keys()

dev_features_edi = [f for i,f in enumerate(dev_features) if i in dev_set_ids['editorial_ids'] or i in dev_set_ids['editorial_news_ids']]
dev_emb_features_edi = [f for i,f in enumerate(dev_embeddings_features) if i in dev_set_ids['editorial_ids'] or i in dev_set_ids['editorial_news_ids']]
dev_labels_edi = [l for i,l in enumerate(dev_labels) if i in dev_set_ids['editorial_ids'] or i in dev_set_ids['editorial_news_ids']]

dev_features_let = [f for i,f in enumerate(dev_features) if i in dev_set_ids['letter_ids'] or i in dev_set_ids['letter_news_ids']]
dev_emb_features_let = [f for i,f in enumerate(dev_embeddings_features) if i in dev_set_ids['letter_ids'] or i in dev_set_ids['letter_news_ids']]
dev_labels_let = [l for i,l in enumerate(dev_labels) if i in dev_set_ids['letter_ids'] or i in dev_set_ids['letter_news_ids']]

print(len(dev_emb_features_edi), len(dev_features_edi), len(dev_labels_edi), Counter(dev_labels_edi))
print(len(dev_emb_features_let), len(dev_features_let), len(dev_labels_let), Counter(dev_labels_let))


test_set_ids = pickle.load(open('test_set_ids_edi_let.p','rb'))

test_features_edi = [f for i,f in enumerate(test_features) if i in test_set_ids['editorial_ids'] or i in test_set_ids['editorial_news_ids']]
test_emb_features_edi = [f for i,f in enumerate(test_embeddings_features) if i in test_set_ids['editorial_ids'] or i in test_set_ids['editorial_news_ids']]
test_labels_edi = [l for i,l in enumerate(test_labels) if i in test_set_ids['editorial_ids'] or i in test_set_ids['editorial_news_ids']]

test_features_let = [f for i,f in enumerate(test_features) if i in test_set_ids['other_ids'] or i in test_set_ids['other_news_ids']]
test_emb_features_let = [f for i,f in enumerate(test_embeddings_features) if i in test_set_ids['other_ids'] or i in test_set_ids['other_news_ids']]
test_labels_let = [l for i,l in enumerate(test_labels) if i in test_set_ids['other_ids'] or i in test_set_ids['other_news_ids']]

print(len(test_emb_features_edi), len(test_features_edi), len(test_labels_edi), Counter(test_labels_edi))
print(len(test_emb_features_let), len(test_features_let), len(test_labels_let), Counter(test_labels_let))

140 140 140 Counter({1: 70, 0: 70})
100 100 100 Counter({0: 50, 1: 50})
548 548 548 Counter({0: 274, 1: 274})
288 288 288 Counter({0: 144, 1: 144})


In [7]:
train_combined_features = [list(emb)+list(arg) for emb, arg in zip(train_embeddings_features, train_features)]
# dev_combined_features = [list(emb)+list(arg) for emb, arg in zip(dev_embeddings_features, dev_features)]
# test_combined_features = [list(emb)+list(arg) for emb, arg in zip(test_embeddings_features, test_features)]

dev_combined_features_edi = [list(emb)+list(arg) for emb, arg in zip(dev_emb_features_edi, dev_features_edi)]
dev_combined_features_let = [list(emb)+list(arg) for emb, arg in zip(dev_emb_features_let, dev_features_let)]

test_combined_features_edi = [list(emb)+list(arg) for emb, arg in zip(test_emb_features_edi, test_features_edi)]
test_combined_features_let = [list(emb)+list(arg) for emb, arg in zip(test_emb_features_let, test_features_let)]

### Model 1: SVM (Emb)

In [8]:
model = LinearSVC()
model.fit(train_embeddings_features, list(train_labels))
pred = model.predict(train_embeddings_features)
# print(classification_report(list(train_labels), pred))

# Emb dev
pred = model.predict(dev_emb_features_edi)
print(classification_report(list(dev_labels_edi), pred))

pred = model.predict(dev_emb_features_let)
print(classification_report(list(dev_labels_let), pred))

# Emb Test
pred = model.predict(test_emb_features_edi)
print(classification_report(list(test_labels_edi), pred))

pred = model.predict(test_emb_features_let)
print(classification_report(list(test_labels_let), pred))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93        70
           1       0.93      0.93      0.93        70

    accuracy                           0.93       140
   macro avg       0.93      0.93      0.93       140
weighted avg       0.93      0.93      0.93       140

              precision    recall  f1-score   support

           0       0.96      1.00      0.98        50
           1       1.00      0.96      0.98        50

    accuracy                           0.98       100
   macro avg       0.98      0.98      0.98       100
weighted avg       0.98      0.98      0.98       100

              precision    recall  f1-score   support

           0       0.83      0.98      0.90       274
           1       0.97      0.80      0.88       274

    accuracy                           0.89       548
   macro avg       0.90      0.89      0.89       548
weighted avg       0.90      0.89      0.89       548

              preci



### Model 2: SVM (Emb+Arg)

In [9]:
model = LinearSVC()

model.fit(train_combined_features, list(train_labels))
pred = model.predict(train_combined_features)
# print(classification_report(list(train_labels), pred))

# Emb+Arg Dev
pred = model.predict(dev_combined_features_edi)
print(classification_report(list(dev_labels_edi), pred))

pred = model.predict(dev_combined_features_let)
print(classification_report(list(dev_labels_let), pred))

# Emb+Arg Test
pred = model.predict(test_combined_features_edi)
print(classification_report(list(test_labels_edi), pred))

pred = model.predict(test_combined_features_let)
print(classification_report(list(test_labels_let), pred))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93        70
           1       0.93      0.93      0.93        70

    accuracy                           0.93       140
   macro avg       0.93      0.93      0.93       140
weighted avg       0.93      0.93      0.93       140

              precision    recall  f1-score   support

           0       0.96      1.00      0.98        50
           1       1.00      0.96      0.98        50

    accuracy                           0.98       100
   macro avg       0.98      0.98      0.98       100
weighted avg       0.98      0.98      0.98       100

              precision    recall  f1-score   support

           0       0.82      0.98      0.89       274
           1       0.97      0.79      0.87       274

    accuracy                           0.89       548
   macro avg       0.90      0.89      0.88       548
weighted avg       0.90      0.89      0.88       548

              preci

