In [36]:
import requests
from pathlib import Path
import plotly.graph_objects as go
from collections import defaultdict

from claimreview_collector.processing import utils

def get_data(url):
    response = requests.get(url)
    return response.json()


In [2]:
# stats about data
# 1 fact-checkers
latest_stats = get_data('https://misinfo.me/misinfo/api/data/latest/')

In [3]:
by_factchecker = latest_stats['claim_reviews']['recollection_stats']
euvsdisinfo_cnt = 14491 # from https://euvsdisinfo.eu/disinformation-cases/
euvsdisinfo_cnt_recollected = 14182 # from localhost mongo database claimreview_collector: db.getCollection('euvsdisinfo').count()
by_factchecker.append({'domain': 'euvsdisinfo.eu', 'before': euvsdisinfo_cnt, 'after': euvsdisinfo_cnt_recollected})

In [4]:
total_before = sum([x['before'] for x in by_factchecker])
total_after = sum([x['after'] for x in by_factchecker])
print('Total before:', total_before, 'Total after:', total_after)

Total before: 208229 Total after: 145238


In [15]:
by_factchecker_filtered = [el for el in sorted(by_factchecker, key=lambda x: x['after'], reverse=True) if el['after'] > 100]
by_factchecker_filtered = sorted(by_factchecker, key=lambda x: x['after'], reverse=True)[:50]
fig = go.Figure([
    # go.Bar(x=[x['domain'] for x in by_factchecker_filtered], y=[x['before'] for x in by_factchecker_filtered], name='before'),
    go.Bar(x=[x['domain'] for x in by_factchecker_filtered], y=[x['after'] for x in by_factchecker_filtered], name='total'),
    ])
# fig.update_layout(xaxis={'categoryorder':'totala descending'})
fig.update_layout(title='Number of ClaimReview by fact-checker')
fig.update_yaxes(nticks=10)
fig.update_xaxes(nticks=50)
fig.write_image('figures/number_of_claimreview_by_fact_checker.pdf')
fig

In [21]:
# get from files the stats
crs = utils.read_json('data/latest/claim_reviews.json')
print('loaded')

loaded


In [24]:
# by label
by_label = defaultdict(int)
for cr in crs:
    label = cr['label']
    by_label[label] += 1

by_label_filtered = {k.replace('_', ' '):v for k,v in by_label.items() if k != 'check_me'}

In [28]:
fig = go.Figure(go.Bar(x=list(by_label_filtered.keys()), y=list(by_label_filtered.values())))
fig.update_layout(xaxis={'categoryorder':'array', 'categoryarray':['credible', 'mostly credible', 'uncertain', 'not credible', 'uncertain']})
fig.update_yaxes(nticks=10)
fig.write_image('figures/number_of_claimreview_by_verdict.pdf')
fig

In [29]:
by_domain = defaultdict(int)

In [33]:
# appearances of fact-checkers (run this once with IFCN and once with euvsdisinfo to accumulate the stats)
links_full = utils.read_json('data/latest/links_all_full.json')
for cr in links_full:
    domain = cr['misinforming_domain']
    by_domain[domain] += 1

In [37]:
utils.write_json_with_path(by_domain, Path('figures'), 'by_domain.json')

In [39]:
by_domain_filtered = sorted([{'domain': k, 'count': v} for k,v in by_domain.items()], key=lambda x: x['count'], reverse=True)[:50]

fig = go.Figure([
    go.Bar(x=[x['domain'] for x in by_domain_filtered], y=[x['count'] for x in by_domain_filtered], name='total'),
    ])
# fig.update_layout(xaxis={'categoryorder':'totala descending'})
fig.update_layout(title='Top 50 websites pointed by ClaimReview')
fig.update_yaxes(nticks=10)
fig.update_xaxes(nticks=50)
fig.write_image('figures/top_50_websites_claimreview.pdf')
fig