In [1]:
import numpy as np
from tqdm import tqdm

In [2]:
import pandas as pd
notes = pd.read_csv('../data/notes-00000.tsv', sep='\t')
ratings = pd.read_csv('../data/ratings-00000.tsv', sep='\t')

In [3]:
## Note: this code snippet's results won't match the results of Birdwatch in production, 
##   because this code snippet doesn't weight ratings by contributors' helpfulness scores.

ratings['helpfulScore'] = 0
ratings.loc[ratings['helpful']==1,'helpfulScore'] = 1
ratings.loc[ratings['helpfulnessLevel']=='SOMEWHAT_HELPFUL','helpfulScore'] = 0.5
ratings.loc[ratings['helpfulnessLevel']=='HELPFUL','helpfulScore'] = 1

In [4]:
ratingsWithNotes = notes.set_index('noteId').join(ratings.set_index('noteId'), lsuffix="\_note", rsuffix="\_rating", how='inner')
ratingsWithNotes['numRatings'] = 1

In [5]:
def getScoredNotesForTweet(
    tweetId,
    minRatingsNeeded = 5,
    minHelpfulnessRatioNeededHelpful = 0.84,
    maxHelpfulnessRatioNeededNotHelpful = .29,
    minRatingsToGetTag = 2,
):
    ratingsWithNotesForTweet = ratingsWithNotes[ratingsWithNotes['tweetId']==tweetId]
    scoredNotes = ratingsWithNotesForTweet.groupby('noteId').sum()
    if scoredNotes.empty:
        return scoredNotes
    scoredNotes['helpfulnessRatio'] = scoredNotes['helpfulScore']/scoredNotes['numRatings']

    helpfulWhys = ['helpfulOther', 'helpfulInformative', 'helpfulClear',
                   'helpfulGoodSources', 'helpfulEmpathetic', 'helpfulUniqueContext']
    notHelpfulWhys = ['notHelpfulOther', 'notHelpfulOpinionSpeculationOrBias', 'notHelpfulSourcesMissingOrUnreliable',
                      'notHelpfulMissingKeyPoints', 'notHelpfulArgumentativeOrInflammatory', 'notHelpfulIncorrect',
                      'notHelpfulOffTopic', 'notHelpfulHardToUnderstand', 'notHelpfulSpamHarassmentOrAbuse', 'notHelpfulOutdated']
    scoredNotes['ratingStatus'] = 'Needs More Ratings'
    scoredNotes.loc[(scoredNotes['numRatings'] >= minRatingsNeeded) & (scoredNotes['helpfulnessRatio'] >= minHelpfulnessRatioNeededHelpful), 'ratingStatus'] = 'Currently Rated Helpful'
    scoredNotes.loc[(scoredNotes['numRatings'] >= minRatingsNeeded) & (scoredNotes['helpfulnessRatio'] <= maxHelpfulnessRatioNeededNotHelpful), 'ratingStatus'] = 'Currently Not Rated Helpful'
    scoredNotes['firstTag'] = np.nan
    scoredNotes['secondTag'] = np.nan
    def topWhys(row):
        if row['ratingStatus']=='Currently Rated Helpful':
            whyCounts = pd.DataFrame(row[helpfulWhys])
        elif row['ratingStatus']=='Currently Not Rated Helpful':
            whyCounts = pd.DataFrame(row[notHelpfulWhys])
        else:
            return row
        whyCounts.columns = ['tagCounts']
        whyCounts['tiebreakOrder'] = range(len(whyCounts))
        whyCounts = whyCounts[whyCounts['tagCounts'] >= minRatingsToGetTag]
        topTags = whyCounts.sort_values(by=['tagCounts','tiebreakOrder'], ascending=False)[:2]
        if (len(topTags) < 2):
            row['ratingStatus'] = 'Needs More Ratings'
        else:
            row['firstTag'] = topTags.index[0]
            row['secondTag'] = topTags.index[1]
        return row

    scoredNotes = scoredNotes.apply(topWhys, axis=1)
    scoredNotes = scoredNotes.join(notes[['noteId','summary']].set_index('noteId'), lsuffix="_note", rsuffix="_rating", how='inner')

    scoredNotes['orderWithinStatus'] = 'helpfulnessRatio'
    scoredNotes.loc[scoredNotes['ratingStatus']=='Needs More Ratings', 'orderWithinStatus'] = 'createdAtMillis_note'
    statusOrder = {'Currently Rated Helpful':2, 'Needs More Ratings':1, 'Currently Not Rated Helpful':0}
    scoredNotes['statusOrder'] = scoredNotes.apply(lambda x: statusOrder[x['ratingStatus']], axis=1)
    return scoredNotes.sort_values(by=['statusOrder','orderWithinStatus'], ascending=False)
    

# BW Score VS CR

In [73]:
# for each tweet:
#  run the scoring function
#  get notes which do not need more labels
#  either average or do mapping from here and majority voting (helpful >0.84, not helpful<0.29)
#  compare  with golden label
#

In [7]:
merge_corpus = pd.read_csv("../data/merged3_mturk.csv")

In [8]:
merge_corpus.columns

Index(['Tweet', 'credibility', 'tweetId', 'CR Fact', 'noteId', 'summary',
       'classification', 'full_text'],
      dtype='object')

In [9]:
BWS_CR=[]

for index,row in merge_corpus.iterrows():
    score_df = getScoredNotesForTweet(row.tweetId)
    if score_df.empty:
        BWS_CR.append([row.tweetId,[],None,[],row.credibility,'',''])
    else:
        helpful_noteids = list(score_df[score_df.apply(lambda x: True if x.statusOrder==2 else False,axis=1)].index)
        classification_helpful_noteids = [notes[notes.noteId==x]['classification'] for x in helpful_noteids]
        BWS_CR.append([row.tweetId,helpful_noteids,f'{len(helpful_noteids)}/{len(score_df)}',classification_helpful_noteids,row.credibility,row.full_text,row['CR Fact']])

In [10]:
BWS_CR = pd.DataFrame(BWS_CR,columns=['tweetId','helpfulNoteIds','PHNIds','Classification','CR Label','full_text','CR Fact'])

In [11]:
BWS_CR=BWS_CR[BWS_CR.apply(lambda x: True if x.helpfulNoteIds else False,axis=1)]

In [12]:
BWS_CR.shape

(533, 7)

In [15]:
len(set(merge_corpus.tweetId))

2208

In [16]:
from collections import Counter
def find_winner(x):
    x = Counter(x).items()
    m = max([xx[1] for xx in x ])
    return [xx[0] for xx in x if xx[1]==m]

In [17]:
BW_winner=[]
for index,row in BWS_CR.iterrows():
    BW_winner.append(find_winner([x.values[0] for x in row['Classification']])[0])

In [18]:
BWS_CR['BW_winner']=BW_winner

In [19]:
pd.crosstab(BWS_CR['BW_winner'],BWS_CR['CR Label'])

CR Label,credible,mostly_credible,not_credible,not_verifiable,uncertain
BW_winner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MISINFORMED_OR_POTENTIALLY_MISLEADING,58,6,320,58,38
NOT_MISLEADING,8,3,39,0,3


In [22]:
pd.crosstab(BWS_CR['BW_winner'],BWS_CR['CR Label']).sum().sum()

533

In [23]:
(320+8+3)/533

0.6210131332082551

In [24]:
for index,row in BWS_CR[(BWS_CR['BW_winner']=='NOT_MISLEADING') & (BWS_CR['CR Label']=='not_credible')].iterrows():
    print(row.full_text)
    print(row['CR Fact'])
    print('\n\n\n\n\n\n')

This is extremely difficult for me to say:

I once thought I truly knew Joe Biden and he helped me through pain and grief, for which I am grateful. 

This man on tv giving this speech, I do not recognize this man. God help our country. God help the Americans we have abandoned.
"['\u201cAnd don\u2019t forget Biden deserted you. He\u2019s not from Pennsylvania. I guess he was born here, but he left you, folks. He left you for another state. Remember that, please. I meant to say that. This guy talks about, \u2018I know Scranton,\u2019 \u2018I know\u2019 \u2014 well I know the places better. He left you for another state and he didn\u2019t take care of you because he didn\u2019t take care of your jobs. He let other countries come in and rip off America.\u201d']"







This is extremely difficult for me to say:

I once thought I truly knew Joe Biden and he helped me through pain and grief, for which I am grateful. 

This man on tv giving this speech, I do not recognize this man. God help o

# BW helpful notes 

In [25]:
all_tweets=list(set(notes.tweetId))

In [26]:
BW_helpful=[]
n=0
t=0
for tid in tqdm(all_tweets):
    score_df = getScoredNotesForTweet(tid)
    BW_helpful.append(score_df)
    if score_df.empty: n+=1
    t+=len(score_df)


100%|██████████| 11871/11871 [03:51<00:00, 51.38it/s]


In [27]:
BW_helpful=pd.concat(BW_helpful)

In [28]:
statusOrder = {'Currently Rated Helpful':2, 'Needs More Ratings':1, 'Currently Not Rated Helpful':0}

In [29]:
len(BW_helpful[BW_helpful['statusOrder']==2])

1192

In [30]:
len(BW_helpful[BW_helpful['statusOrder']==1])/len(notes)

0.5226286824214956

In [31]:
len(BW_helpful[BW_helpful['statusOrder']==1])

8072

In [32]:
len(BW_helpful[BW_helpful['statusOrder']==0])/len(notes)

0.04797669148591777

In [33]:
len(BW_helpful[BW_helpful['statusOrder']==0])

741

In [34]:
len(BW_helpful[BW_helpful['statusOrder']==2])/len(notes)

0.07717707996115247

In [35]:
len(BW_helpful[BW_helpful['statusOrder']==2])

1192