In [None]:
import json
import pandas as pd

label2id = {
    'NONE': 0,
    'EVIDENCE': 1,
    'CLAIM': 2}

def load_corpus(path, label_mapping=None):
    with open(path) as fp:
        corpus = json.load(fp)

    documents, texts, labels = [], [], []
    for abstract in corpus:
        documents.append(abstract)
        texts.append(corpus[abstract]['sentences'])
        if isinstance(label_mapping, dict):
            labels.append(
                [label_mapping[str(l).upper()]
                    for l in corpus[abstract]['labels']])
        else:
            labels.append([str(l).upper() for l in corpus[abstract]['labels']])

    assert len(texts) == len(labels)
    data = pd.DataFrame(
        zip(documents, texts, labels),
        columns=['document', 'sentences', 'labels'])

    return data

data_v1 = load_corpus('dataset.json') #, label_mapping=label2id)
data_v3= load_corpus('dataset_aueb_argument_v3.json') #, label_mapping=label2id)
print(f'Dataset length: {len(data_v1)} abstracts')
print(f'Dataset length: {len(data_v3)} abstracts')

Dataset length: 1669 abstracts
Dataset length: 1017 abstracts


##Split Documents
For the cases we want the sentences separated, the following splits the documents. I keep the same document index in a new column in order to re-group the sentences to a document (e.g., after predictions).

In [None]:
#@title Split to sentences
sentences_v1 = data_v1['sentences'].explode().reset_index().rename(
    columns={'index': 'doc_id', 'sentences': 'sentence'})

sentences_v3 = data_v3['sentences'].explode().reset_index().rename(
    columns={'index': 'doc_id', 'sentences': 'sentence'})




In [None]:
#@title and the corresponding labels
labels_v1 = data_v1['labels'].explode().reset_index().rename(
    columns={'index': 'doc_id', 'labels': 'label'})

labels_v3 = data_v3['labels'].explode().reset_index().rename(
    columns={'index': 'doc_id', 'labels': 'label'})

In [None]:
sentences_v1["label"]=labels_v1["label"]
sentences_v3["label"]=labels_v3["label"]

data = sentences_v1.append(sentences_v3, ignore_index=True)
data['label'] = data['label'].str.replace('NONE','NEITHER')
print(f'Dataset length: {len(data)} abstracts')
data.head()

Unnamed: 0,doc_id,sentence,label
0,0,Gender Differences in Anxiety and Depression b...,NEITHER
1,0,Abstract,NEITHER
2,0,Background/aims: The aim of this prospective s...,NEITHER
3,0,"Methods: AUD severity, state and trait anxiety...",NEITHER
4,0,Follow-up assessments were performed at 6 and ...,NEITHER


In [None]:
# splitting in train-validation-test sets in a stratified manner.
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data['sentence'],
                                                            data['label'],
                                                            test_size=0.35,
                                                            random_state=42,
                                                            stratify=data['label'])

In [None]:
#create two different dataframes for claim and evidence
from collections import Counter
df=pd.concat([X_train,y_train], axis=1)
d_claim=df[df['label']=='CLAIM']
d_evidence=df[df['label']=='EVIDENCE']
#find top 50 most common words for evidence
Counter(" ".join(d_evidence["sentence"]).split()).most_common(50)

[('the', 4182),
 ('and', 3788),
 ('of', 3352),
 ('in', 2894),
 ('to', 1715),
 ('with', 1359),
 ('was', 1198),
 ('for', 1108),
 ('a', 1023),
 ('were', 963),
 ('=', 946),
 ('The', 722),
 ('that', 639),
 ('group', 526),
 ('at', 523),
 ('patients', 518),
 ('by', 485),
 ('as', 421),
 ('than', 412),
 ('or', 410),
 ('on', 402),
 ('between', 397),
 ('from', 394),
 ('(P', 374),
 ('is', 374),
 ('significant', 353),
 ('significantly', 342),
 ('P', 333),
 ('<', 301),
 ('more', 299),
 ('not', 295),
 ('had', 282),
 ('compared', 265),
 ('an', 262),
 ('95%', 262),
 ('higher', 251),
 ('are', 240),
 ('months', 233),
 ('no', 229),
 ('but', 218),
 ('both', 214),
 ('associated', 204),
 ('survival', 201),
 ('CI', 201),
 ('In', 199),
 ('difference', 190),
 ('treatment', 189),
 ('after', 187),
 ('increased', 181),
 ('be', 173)]

In [None]:
#find top 50 most common words for claim
Counter(" ".join(d_claim["sentence"]).split()).most_common(50)

[('the', 2167),
 ('of', 2063),
 ('and', 1814),
 ('in', 1434),
 ('to', 1195),
 ('a', 803),
 ('with', 790),
 ('for', 630),
 ('that', 583),
 ('is', 508),
 ('The', 354),
 ('be', 344),
 ('as', 313),
 ('on', 275),
 ('are', 271),
 ('patients', 263),
 ('by', 242),
 ('was', 202),
 ('an', 198),
 ('not', 180),
 ('results', 175),
 ('from', 171),
 ('this', 170),
 ('This', 166),
 ('can', 161),
 ('at', 156),
 ('treatment', 155),
 ('or', 149),
 ('In', 142),
 ('may', 140),
 ('more', 140),
 ('study', 138),
 ('health', 129),
 ('were', 127),
 ('but', 122),
 ('associated', 121),
 ('between', 121),
 ('climate', 118),
 ('have', 116),
 ('than', 114),
 ('which', 105),
 ('cancer', 103),
 ('Conclusions:', 103),
 ('We', 94),
 ('These', 93),
 ('should', 91),
 ('quality', 90),
 ('effective', 87),
 ('risk', 86),
 ('has', 84)]

In [None]:
#words for classification
claim=['conclusions','reveal', 'provide' , 'confirm' , 'suggests','reveals', 'provides' , 'confirms' ,'altogether','overall']
evidence=['results','found','showed','finds','findings','shows','associated']

In [None]:
sentences=X_test.to_list()#convert pd.series to list
label_pred=[]
for sentence in sentences: 
    sentence=sentence.split()
    n=len(sentence)
    for word in sentence:
        if word.lower() in claim:
            label_pred.append('CLAIM')#classify the sentence as claim if there is a word in the list with claims
            break
        elif word.lower() in evidence:
            label_pred.append('EVIDENCE')#classify the sentence as claim if there is a word in the list with evidences
            break
        n=n-1
    if n==0:
        label_pred.append('NEITHER')

In [None]:
from sklearn import metrics
print(metrics.classification_report(y_test,label_pred))#print precision recall f1-score 
print()
print(metrics.confusion_matrix(y_test,label_pred))#print confusion matrix

              precision    recall  f1-score   support

       CLAIM       0.26      0.09      0.13      1197
    EVIDENCE       0.32      0.16      0.21      2173
     NEITHER       0.74      0.91      0.81      7832

    accuracy                           0.68     11202
   macro avg       0.44      0.39      0.39     11202
weighted avg       0.60      0.68      0.62     11202


[[ 108  241  848]
 [ 106  351 1716]
 [ 205  507 7120]]
