## This script corresponds to section 3.3 - Evaluation of data sets - of the paper "Neural Media Bias Detection Using Distant Supervision With BABE"


To run this script, you need the following files found in the /data directory:
- "raw_labels_MBIC.xlsx"
- "raw_labels_SG1.xlsx"
- "raw_labels_SG2.xlsx"
- "final_labels_MBIC.xlsx"
- "final_labels_SG1.xlsx"
- "final_labels_SG2.xlsx"

In [1]:
import pandas as pd
import statistics
import statsmodels
from statsmodels.stats import inter_rater
import krippendorff
import os
import ast

  import pandas.util.testing as tm


#### Load raw labels of all subgroups containing all individual annotations for agreement calculations

In [2]:
os.chdir("Your directory containing the data files")
MBIC_raw = pd.read_excel("raw_labels_MBIC.xlsx")
SG1_raw = pd.read_excel("raw_labels_SG1.xlsx")
SG2_raw = pd.read_excel("raw_labels_SG2.xlsx")

In [3]:
####preprocess labels --> encode string to int labels for agreement calculations
MBIC_raw.replace(to_replace='Biased',value=1,inplace=True)
MBIC_raw.replace(to_replace='Non-biased',value=0,inplace=True)
MBIC_raw.replace(to_replace='Expresses writer’s opinion',value=2,inplace=True)
MBIC_raw.replace(to_replace='Somewhat factual but also opinionated',value=1,inplace=True)
MBIC_raw.replace(to_replace='Entirely factual',value=0,inplace=True)

SG1_raw.replace(to_replace="Expresses wleter´s opinion",value=2,inplace=True)
SG1_raw.replace(to_replace="Expresses writer’s opinion",value=2,inplace=True)
SG1_raw.replace(to_replace='Somewhat factional but also opinionated',value=1,inplace=True)
SG1_raw.replace(to_replace='Entirely factual',value=0,inplace=True)

SG2_raw.replace(to_replace='Expresses writer’s opinion',value=2,inplace=True)
SG2_raw.replace(to_replace='Somewhat factual but also opinionated',value=1,inplace=True)
SG2_raw.replace(to_replace='Entirely factual',value=0,inplace=True)

#### Bias agreement

In [4]:
#define pivot tables for bias agreement calculations
MBIC_bias = MBIC_raw.pivot(index='survey_record_id', columns='text', values='label_bias')
SG1_bias = SG1_raw.pivot(index='annotator_id', columns='text', values='Label_bias_0-1')
SG2_bias = SG2_raw.pivot(index='df_id', columns='text', values='Label_bias_0-1')

In [5]:
#calculate bias agreement
bias_alpha_MBIC = krippendorff.alpha(MBIC_bias)
print("Krippendorff's alpha for bias labels in MBIC = {}".format(round(bias_alpha_MBIC,2)))
bias_alpha_SG1 = krippendorff.alpha(SG1_bias)
print("Krippendorff's alpha for bias labels in SG1 = {}".format(round(bias_alpha_SG1,2)))
bias_alpha_SG2 = krippendorff.alpha(SG2_bias)
print("Krippendorff's alpha for bias labels in SG2 = {}".format(round(bias_alpha_SG2,2)))

Krippendorff's alpha for bias labels in MBIC = 0.21
Krippendorff's alpha for bias labels in SG1 = 0.39
Krippendorff's alpha for bias labels in SG2 = 0.4


#### Opinion agreement

In [6]:
# pivoting for opinion agreement calculation
MBIC_opin = MBIC_raw.pivot(index='survey_record_id', columns='text', values='label_opinion')
SG1_opin = SG1_raw.pivot(index='annotator_id', columns='text', values='label_opinion')
SG2_opin = SG2_raw.pivot(index = 'df_id', columns='text', values='label_opinion')

In [7]:
#calculate opinion agreement
opin_alpha_MBIC = krippendorff.alpha(MBIC_opin)
print("Krippendorff's alpha for opinion labels in MBIC = {}".format(round(opin_alpha_MBIC,2)))
opin_alpha_SG1 = krippendorff.alpha(SG1_opin)
print("Krippendorff's alpha for opinion labels in SG1 = {}".format(round(opin_alpha_SG1,2)))
opin_alpha_SG2 = krippendorff.alpha(SG2_opin)
print("Krippendorff's alpha for opinion labels in SG2 = {}".format(round(opin_alpha_SG2,2)))

Krippendorff's alpha for opinion labels in MBIC = 0.26
Krippendorff's alpha for opinion labels in SG1 = 0.46
Krippendorff's alpha for opinion labels in SG2 = 0.6


#### Load aggregated labels of all subgroups for calculation of descriptive statistics

In [8]:
MBIC  = pd.read_excel("final_labels_MBIC.xlsx")
SG1 =  pd.read_excel("final_labels_SG1.xlsx")
SG2 = pd.read_excel("final_labels_SG2.xlsx")

  warn(msg)


#### Number of Biased words per biased sentence

In [9]:
#MBIC
MBIC["biased_words"] = MBIC.biased_words.apply(lambda s: list(ast.literal_eval(s)))
MBIC['num_biased_words'] = MBIC.biased_words.apply(lambda row: len(row))
sent_with_biased_words = MBIC[MBIC['num_biased_words']>0]
print('MBIC: Average number of biased words in the biased sentences:', round(sent_with_biased_words.num_biased_words.mean(),2))

#SG1
SG1["biased_words"] = SG1.biased_words.apply(lambda s: list(ast.literal_eval(s)))
SG1['num_biased_words'] = SG1.biased_words.apply(lambda row: len(row))
sent_with_biased_words = SG1[SG1['num_biased_words']>0]
print('SG1: Average number of biased words in the biased sentences:', round(sent_with_biased_words.num_biased_words.mean(),2))

#SG2
SG2["biased_words"] = SG2.biased_words.apply(lambda s: list(ast.literal_eval(s)))
SG2['num_biased_words'] = SG2.biased_words.apply(lambda row: len(row))
sent_with_biased_words = SG2[SG2['num_biased_words']>0]
print('SG2: Average number of biased words in the biased sentences:', round(sent_with_biased_words.num_biased_words.mean(),2))

MBIC: Average number of biased words in the biased sentences: 2.4
SG1: Average number of biased words in the biased sentences: 1.95
SG2: Average number of biased words in the biased sentences: 2.11


#### Number of Total biased words

In [10]:
#count total numbers of words
sum_words_SG1 = 0 #MBIC and SG1 have the same number of biased words since they comprise identical sentences
sum_words_SG2 = 0

for sent in SG1['text']:
    sum_words_SG1 += len(sent.split())
for sent in SG2['text']:
    sum_words_SG2 += len(sent.split())

In [11]:
#MBIC
biased_words_sum_MBIC = MBIC['num_biased_words'].sum()
print ("{} out of {} words are labeled as biased in MBIC".format(biased_words_sum_MBIC,sum_words_SG1))

#SG1
biased_words_sum_SG1 = SG1['num_biased_words'].sum()
print ("{} out of {} words are labeled as biased in SG1".format(biased_words_sum_SG1,sum_words_SG1))

#SG2
biased_words_sum_SG2 = SG2['num_biased_words'].sum()
print ("{} out of {} words are labeled as biased in SG2".format(biased_words_sum_SG2,sum_words_SG2))

3283 out of 56826 words are labeled as biased in MBIC
1530 out of 56826 words are labeled as biased in SG1
3902 out of 116232 words are labeled as biased in SG2


#### Bias Label Distribution

In [12]:
#MBIC
bias_obs_MBIC = MBIC.groupby(['label_bias'])[['text']].count()
bias_obs_MBIC = bias_obs_MBIC.reset_index()
bias_obs_MBIC = bias_obs_MBIC.rename(columns={"text": "num_sentences"})
bias_obs_MBIC['sorting'] = [1, 3, 2]
bias_obs_MBIC = bias_obs_MBIC.sort_values(by=['sorting']).reset_index()
bias_obs_MBIC = bias_obs_MBIC[['label_bias','num_sentences']]
bias_obs_MBIC['percentage '] = bias_obs_MBIC['num_sentences'] / bias_obs_MBIC['num_sentences'].sum() * 100 # get percentage

#SG1
bias_obs_SG1 = SG1.groupby(['label_bias'])[['text']].count()
bias_obs_SG1 = bias_obs_SG1.reset_index()
bias_obs_SG1 = bias_obs_SG1.rename(columns={"text": "num_sentences"})
bias_obs_SG1['sorting'] = [1, 3, 2]
bias_obs_SG1 = bias_obs_SG1.sort_values(by=['sorting']).reset_index()
bias_obs_SG1 = bias_obs_SG1[['label_bias','num_sentences']]
bias_obs_SG1['percentage '] = bias_obs_SG1['num_sentences'] / bias_obs_SG1['num_sentences'].sum() * 100 #get percentage

#SG2
bias_obs_SG2 = SG2.groupby(['label_bias'])[['text']].count()
bias_obs_SG2 = bias_obs_SG2.reset_index()
bias_obs_SG2 = bias_obs_SG2.rename(columns={"text": "num_sentences"})
bias_obs_SG2['sorting'] = [1, 3, 2]
bias_obs_SG2 = bias_obs_SG2.sort_values(by=['sorting']).reset_index()
bias_obs_SG2 = bias_obs_SG2[['label_bias','num_sentences']]
bias_obs_SG2['percentage '] = bias_obs_SG2['num_sentences'] / bias_obs_SG2['num_sentences'].sum() * 100 #get percentage

print("MBIC Bias Label Distribution","\n",bias_obs_MBIC)
print("---------------------------------")
print("SG1 Bias Label Distribution","\n",bias_obs_SG1)
print("---------------------------------")
print("SG2 Bias Label Distribution","\n",bias_obs_SG2)

MBIC Bias Label Distribution 
      label_bias  num_sentences  percentage 
0        Biased           1018    59.882353
1    Non-biased            533    31.352941
2  No agreement            149     8.764706
---------------------------------
SG1 Bias Label Distribution 
      label_bias  num_sentences  percentage 
0        Biased            746    43.882353
1    Non-biased            800    47.058824
2  No agreement            154     9.058824
---------------------------------
SG2 Bias Label Distribution 
      label_bias  num_sentences  percentage 
0        Biased           1810    49.265106
1    Non-biased           1863    50.707676
2  No agreement              1     0.027218


#### Opinion Label Distribution

In [13]:
#MBIC
opin_obs_MBIC = MBIC.groupby(['label_opinion'])[['text']].count()
opin_obs_MBIC = opin_obs_MBIC.reset_index()
opin_obs_MBIC = opin_obs_MBIC.rename(columns={"text": "num_sentences"})
opin_obs_MBIC['sorting'] = [2, 1, 4,3]
opin_obs_MBIC = opin_obs_MBIC.sort_values(by=['sorting']).reset_index()
opin_obs_MBIC = opin_obs_MBIC[['label_opinion','num_sentences']]
opin_obs_MBIC = opin_obs_MBIC.replace('Entirely factual', 'Factual')
opin_obs_MBIC = opin_obs_MBIC.replace('Expresses writer’s opinion', 'Opinionated')
opin_obs_MBIC = opin_obs_MBIC.replace('Somewhat factual but also opinionated', 'Both')
opin_obs_MBIC['percentage '] = opin_obs_MBIC['num_sentences'] / opin_obs_MBIC['num_sentences'].sum() * 100 # get percentage

#SG1
opin_obs_SG1 = SG1.groupby(['label_opinion'])[['text']].count()
opin_obs_SG1 = opin_obs_SG1.reset_index()
opin_obs_SG1 = opin_obs_SG1.rename(columns={"text": "num_sentences"})
opin_obs_SG1['sorting'] = [2, 1, 4,3]
opin_obs_SG1 = opin_obs_SG1.sort_values(by=['sorting']).reset_index()
opin_obs_SG1 = opin_obs_SG1[['label_opinion','num_sentences']]
opin_obs_SG1 = opin_obs_SG1.replace('Entirely factual', 'Factual')
opin_obs_SG1 = opin_obs_SG1.replace('Expresses writer’s opinion', 'Opinionated')
opin_obs_SG1 = opin_obs_SG1.replace('Somewhat factual but also opinionated', 'Both')
opin_obs_SG1['percentage '] = opin_obs_SG1['num_sentences'] / opin_obs_SG1['num_sentences'].sum() * 100 # get percentage

#SG2
opin_obs_SG2 = SG2.groupby(['label_opinion'])[['text']].count()
opin_obs_SG2 = opin_obs_SG2.reset_index()
opin_obs_SG2 = opin_obs_SG2.rename(columns={"text": "num_sentences"})
opin_obs_SG2['sorting'] = [2,1, 4,3]
opin_obs_SG2 = opin_obs_SG2.sort_values(by=['sorting']).reset_index()
opin_obs_SG2 = opin_obs_SG2[['label_opinion','num_sentences']]
opin_obs_SG2 = opin_obs_SG2.replace('Entirely factual', 'Factual')
opin_obs_SG2 = opin_obs_SG2.replace('Expresses writer’s opinion', 'Opinionated')
opin_obs_SG2 = opin_obs_SG2.replace('Somewhat factual but also opinionated', 'Both')
opin_obs_SG2['percentage '] = opin_obs_SG2['num_sentences'] / opin_obs_SG2['num_sentences'].sum() * 100 # get percentage

print("MBIC Opinion Label Distribution","\n",opin_obs_MBIC)
print("---------------------------------")
print("SG1 Opinion Label Distribution","\n",opin_obs_SG1)
print("---------------------------------")
print("SG2 Opinion Label Distribution","\n",opin_obs_SG2)

MBIC Opinion Label Distribution 
   label_opinion  num_sentences  percentage 
0   Opinionated            521    30.647059
1       Factual            572    33.647059
2          Both            433    25.470588
3  No agreement            174    10.235294
---------------------------------
SG1 Opinion Label Distribution 
   label_opinion  num_sentences  percentage 
0   Opinionated            425    25.000000
1       Factual            639    37.588235
2          Both            453    26.647059
3  No agreement            183    10.764706
---------------------------------
SG2 Opinion Label Distribution 
   label_opinion  num_sentences  percentage 
0   Opinionated            858    23.353293
1       Factual           1600    43.549265
2          Both           1000    27.218291
3  No agreement            216     5.879151
