In [4]:
import pandas as pd
import nltk
from nltk import word_tokenize
import os

negation_cues = ['avoid','absence of','dishonest','involuntarily','without','nobody','lest','no', 'not', 'never', 'neither', 'nor', "can't", "won't", "don't", "didn't", "doesn't", "isn't", "aren't", "wasn't", "weren't", "haven't", "hasn't", "hadn't"]

# load the IBM Debater – Claim Stance Dataset
df = pd.read_csv('article_info.csv')

negation_cue_dict = {key: 0 for key in negation_cues}


def count_negation_cues(path):
    with open(path, 'r', encoding='utf-8') as file:
        text = file.read()
    tokens = word_tokenize(text)
    n_count = 0
    for token in tokens:
        if token.lower() in negation_cues:
            cue = token.lower()
            if cue in negation_cue_dict.keys():
                negation_cue_dict[cue] += 1
            n_count += 1
    return n_count


def get_least_frequent_negation_cues(d):
    min_count = min(d.values())
    return [k for k, v in d.items() if v == min_count]


df['negation_count'] = df['clean_file'].apply(lambda x: count_negation_cues(x))

print(df[['negation_count']].describe())

print(negation_cue_dict)

print("Most frequent Negation Cue:",max(negation_cue_dict , key=negation_cue_dict.get))
print("Least frequent Negation Cues:",get_least_frequent_negation_cues(negation_cue_dict))


       negation_count
count     1056.000000
mean        20.786932
std         22.930868
min          0.000000
25%          5.000000
50%         13.500000
75%         29.000000
max        256.000000
{'avoid': 406, 'absence of': 0, 'dishonest': 14, 'involuntarily': 11, 'without': 1870, 'nobody': 37, 'lest': 10, 'no': 4026, 'not': 14222, 'never': 728, 'neither': 263, 'nor': 364, "can't": 0, "won't": 0, "don't": 0, "didn't": 0, "doesn't": 0, "isn't": 0, "aren't": 0, "wasn't": 0, "weren't": 0, "haven't": 0, "hasn't": 0, "hadn't": 0}
Most frequent Negation Cue: not
Least frequent Negation Cues: ['absence of', "can't", "won't", "don't", "didn't", "doesn't", "isn't", "aren't", "wasn't", "weren't", "haven't", "hasn't", "hadn't"]


In [5]:
# load the Webis-Argument-Framing-19 dataset
df = pd.read_csv('Webis-argument-framing.csv')

negation_cue_dict = {key: 0 for key in negation_cues}


def count_negation_cues(text):
    tokens = word_tokenize(text)
    n_count = 0
    for token in tokens:
        if token.lower() in negation_cues:
            cue = token.lower()
            if cue in negation_cue_dict.keys():
                negation_cue_dict[cue] += 1
            n_count += 1
    return n_count


df['negation_count_premise'] = df['premise'].apply(count_negation_cues)

df['negation_count_conclusion'] = df['conclusion'].apply(count_negation_cues)


print(df[['argument_id', 'negation_count_premise','negation_count_conclusion']].describe())


print(negation_cue_dict)

print("Most frequent Negation Cue:",max(negation_cue_dict , key=negation_cue_dict.get))
print("Least frequent Negation Cues:",get_least_frequent_negation_cues(negation_cue_dict))


        argument_id  negation_count_premise  negation_count_conclusion
count  12326.000000            12326.000000               12326.000000
mean    6954.345692                0.784845                   0.189680
std     4061.683805                1.066830                   0.408085
min        0.000000                0.000000                   0.000000
25%     3383.250000                0.000000                   0.000000
50%     6979.500000                0.000000                   0.000000
75%    10410.750000                1.000000                   0.000000
max    14114.000000               10.000000                   3.000000
{'avoid': 222, 'absence of': 0, 'dishonest': 2, 'involuntarily': 4, 'without': 763, 'nobody': 53, 'lest': 6, 'no': 2250, 'not': 8075, 'never': 389, 'neither': 92, 'nor': 156, "can't": 0, "won't": 0, "don't": 0, "didn't": 0, "doesn't": 0, "isn't": 0, "aren't": 0, "wasn't": 0, "weren't": 0, "haven't": 0, "hasn't": 0, "hadn't": 0}
Most frequent Negation Cue: not

In [6]:
# load the subset dataset of the collected student essays by Stab and Gurevych
df = pd.read_csv('UKP-InsufficientArguments_v1.0/data-tokenized.tsv', sep='\t', encoding='iso-8859-1')

negation_cue_dict = {key: 0 for key in negation_cues}

df['negation_count'] = df['TEXT'].apply(count_negation_cues)


print(df[['ARGUMENT', 'negation_count']].describe())

print(negation_cue_dict)

print("Most frequent Negation Cue:",max(negation_cue_dict , key=negation_cue_dict.get))
print("Least frequent Negation Cues:",get_least_frequent_negation_cues(negation_cue_dict))

          ARGUMENT  negation_count
count  1029.000000     1029.000000
mean      1.842566        0.919339
std       0.794250        1.049507
min       1.000000        0.000000
25%       1.000000        0.000000
50%       2.000000        1.000000
75%       2.000000        1.000000
max       5.000000        6.000000
{'avoid': 17, 'absence of': 0, 'dishonest': 0, 'involuntarily': 0, 'without': 89, 'nobody': 5, 'lest': 0, 'no': 107, 'not': 678, 'never': 40, 'neither': 4, 'nor': 6, "can't": 0, "won't": 0, "don't": 0, "didn't": 0, "doesn't": 0, "isn't": 0, "aren't": 0, "wasn't": 0, "weren't": 0, "haven't": 0, "hasn't": 0, "hadn't": 0}
Most frequent Negation Cue: not
Least frequent Negation Cues: ['absence of', 'dishonest', 'involuntarily', 'lest', "can't", "won't", "don't", "didn't", "doesn't", "isn't", "aren't", "wasn't", "weren't", "haven't", "hasn't", "hadn't"]
