In [2]:
import pandas as pd
import numpy as np

## Covid

In [3]:
df_covid = pd.DataFrame(columns=['claim_type', 'claim', 'logical_fallacies'])
df_covid.head()

Unnamed: 0,claim_type,claim,logical_fallacies


In [4]:
fallacies = ['Cherry Picking', 'Evading the Burden of Proof', 'False Analogy', 'False Authority', 'False Cause', 'Hasty Generalization', 'No Fallacy', 'Post Hoc', 'Red Herring', 'Strawman', 'Vagueness']
set = ['train', 'test', 'dev']

In [5]:
for s in set:
    for f in fallacies:
        df_current = pd.read_csv(f'../data/rawdata/5_covid_{s}/{f}.tsv', sep='\t')
        df_current['logical_fallacies'] = f
        df_covid = pd.concat([df_covid, df_current])

In [6]:
df_covid.info()

<class 'pandas.core.frame.DataFrame'>
Index: 154 entries, 0 to 1
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   claim_type         154 non-null    object
 1   claim              154 non-null    object
 2   logical_fallacies  154 non-null    object
dtypes: object(3)
memory usage: 4.8+ KB


In [7]:
df_covid['logical_fallacies'].unique()

array(['Cherry Picking', 'Evading the Burden of Proof', 'False Analogy',
       'False Authority', 'False Cause', 'Hasty Generalization',
       'No Fallacy', 'Post Hoc', 'Red Herring', 'Strawman', 'Vagueness'],
      dtype=object)

In [8]:
df_covid['logical_fallacies'].value_counts()

logical_fallacies
No Fallacy                     62
Evading the Burden of Proof    14
Cherry Picking                 13
Post Hoc                       13
False Authority                10
Red Herring                     9
False Analogy                   8
Hasty Generalization            7
Vagueness                       7
False Cause                     6
Strawman                        5
Name: count, dtype: int64

In [9]:
i = 0
for i in np.arange(0,5,1):
    print(df_covid.iloc[i, 2], ':')
    print(df_covid.iloc[i, 1])
    i = i+1

Cherry Picking :
In November 2020, Ticketmaster announced that it planned to require event attendees to verify that they have tested negative for the novel coronavirus within a 72-hour time frame.
Cherry Picking :
Says that “the way to reach ‘herd immunity’ is for >70% of the population to get vaccinated.”
Cherry Picking :
The public won’t be able to claim compensation if unlicensed vaccines damage our health.
Cherry Picking :
“vaccinated children appear to be significantly less healthy than the unvaccinated”
Cherry Picking :
The WHO now says a child’s presence in school counts as “informed consent” for a vaccination and parental presence is not required.


In [17]:
df_covid.to_csv('../data/cleaned/5_covid.csv', index=False)

## Climate

In [10]:
df_climate = pd.DataFrame(columns=['fact_checked_segment', 'comment_by_fact-checker', 'article', 'logical_fallacies'])
df_climate.head()

Unnamed: 0,fact_checked_segment,comment_by_fact-checker,article,logical_fallacies


In [11]:
for s in set:
    for f in fallacies:
        df_current = pd.read_csv(f'../data/rawdata/5_climate_{s}/{f}.tsv', sep='\t')
        df_current['logical_fallacies'] = f
        df_climate = pd.concat([df_climate, df_current])

In [12]:
df_climate.info()

<class 'pandas.core.frame.DataFrame'>
Index: 685 entries, 0 to 11
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   fact_checked_segment     683 non-null    object
 1   comment_by_fact-checker  684 non-null    object
 2   article                  374 non-null    object
 3   logical_fallacies        685 non-null    object
dtypes: object(4)
memory usage: 26.8+ KB


In [13]:
df_climate['logical_fallacies'].value_counts()

logical_fallacies
No Fallacy                     208
Cherry Picking                 105
Vagueness                       74
Red Herring                     69
False Authority                 50
Evading the Burden of Proof     48
False Cause                     46
Strawman                        36
False Analogy                   27
Post Hoc                        14
Hasty Generalization             8
Name: count, dtype: int64

In [14]:
df_climate.head(2)

Unnamed: 0,fact_checked_segment,comment_by_fact-checker,article,logical_fallacies
0,“climate economists see a positive externality...,This is cherry-picking at its worst. You can a...,article36.txt,Cherry Picking
1,The latest U.N. science compendium asserts tha...,The recent US National Climate Assessment1 fin...,article130.txt,Cherry Picking


In [15]:
i = 0
for i in np.arange(0,5,1):
    print(df_climate.iloc[i, 3], ':')
    print(df_climate.iloc[i, 0])
    i = i+1

Cherry Picking :
“climate economists see a positive externality, not a negative one, from the human influence on climate. (In technical lingo, the so-called social cost of carbon would be negative.)”
Cherry Picking :
The latest U.N. science compendium asserts that the latter half-degree is at least half manmade.
Cherry Picking :
“‘If we are right, our study challenges decades of paleoclimate research,” said Anders Meibom, the head of EPFL’s Laboratory for Biological Geochemistry and a professor at the University of Lausanne.”
Cherry Picking :
“a killer analysis conducted by Craig Idso of all the studies which have been done on the effects of reduced pH levels on marine life.”
Cherry Picking :
“Next year or the year after that, I think it will be free of ice in summer and by that I mean the central Arctic will be ice-free. You will be able to cross over the north pole by ship. There will still be about a million square kilometres of ice in the Arctic in summer but it will be packed into

In [18]:
df_climate.to_csv('../data/cleaned/5_climate.csv', index=False)

## Logic
<span style='color:red'>It's the same as LogicClimate from Jin et al!</span>

## Argotario

In [19]:
df_arg = pd.read_csv(f'../data/rawdata/5_argotario.tsv', sep='\t', index_col=0)
df_arg = df_arg[['Intended Fallacy', 'Text']]
df_arg = df_arg.rename(columns={"Intended Fallacy": "logical_fallacies", "Text": "claim"})
df_arg.head()


Unnamed: 0_level_0,logical_fallacies,claim
Row number,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Appeal to Emotion,"No, imagine you were a teacher and everybody w..."
1,Red Herring,"The moon is so far away, we should focus on ou..."
2,Irrelevant Authority,"The green party in Germany has the opinion, th..."
3,Red Herring,"No, not at all. Nowadays kids spend all time p..."
4,No Fallacy,"Yes, whoever drinks and drives has a bad reac..."


In [20]:
df_arg.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1344 entries, 0 to 1337
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   logical_fallacies  1338 non-null   object
 1   claim              1338 non-null   object
dtypes: object(2)
memory usage: 31.5+ KB


In [21]:
df_arg['logical_fallacies'].unique()

array(['Appeal to Emotion', 'Red Herring', 'Irrelevant Authority',
       'No Fallacy', 'Ad Hominem', 'Hasty Generalization', nan],
      dtype=object)

In [22]:
df_arg['logical_fallacies'].value_counts()

logical_fallacies
No Fallacy              429
Appeal to Emotion       236
Red Herring             193
Ad Hominem              166
Irrelevant Authority    157
Hasty Generalization    157
Name: count, dtype: int64

In [23]:
i = 0
for i in np.arange(0,5,1):
    print(df_arg.iloc[i, 0], ':')
    print(df_arg.iloc[i, 1])
    i = i+1

Appeal to Emotion :
No, imagine you were a teacher and everybody would just concentrate on their smartphones? How would that feel?
Red Herring :
The moon is so far away, we should focus on our society.
Irrelevant Authority :
The green party in Germany has the opinion, that nuclear reactors are bad for us. It is not ok
Red Herring :
No, not at all. Nowadays kids spend all time playing around with tablets and smartphones, which is kinda detrimental to their proper development.
No Fallacy :
Yes,  whoever drinks and drives has a bad reaction time. People might die.


In [24]:
df_arg.to_csv('../data/cleaned/7_argotario.csv', index=False)