In [8]:
!pip install Pillow

Collecting Pillow
  Downloading Pillow-8.1.0-cp38-cp38-macosx_10_10_x86_64.whl (2.2 MB)
[K     |████████████████████████████████| 2.2 MB 2.0 MB/s 
[?25hInstalling collected packages: Pillow
Successfully installed Pillow-8.1.0


## Import Results

In [63]:
import json

# content predictions
with open("predictions/"+"content_predictions.json") as json_file:
    content_predictions = json.load(json_file)

# advertisement agency predictions
with open("predictions/"+"advertisement_agency_predictions.json") as json_file:
    agency_predictions = json.load(json_file)

# title predictions
with open("predictions/"+"title_predictions.json") as json_file:
    title_predictions = json.load(json_file)

# topic agonistic predictions
with open("predictions/"+"topic_agonistic_predictions.json") as json_file:
    topic_agonistic_predictions = json.load(json_file)

# load raw snippet data
with open("raw_snippet_data.json") as json_file:
    raw_snippet_data = json.load(json_file)

In [64]:
def get_snippet_by_id(doc_id):
  for obj in raw_snippet_data:
      for snippet in obj["snippets"]:
          if snippet["snippet_id"] == doc_id:
              return snippet
  return None

def save_snippet_by_json(json_obj, topic, doc_id):
    with open("baseline/topics/"+str(topic)+"/"+doc_id+".json", 'w') as outfile:
       json.dump(json_obj, outfile)

In [65]:
from PIL import Image, ImageDraw

light_gray = "#dddddd"
def create_traffic_light(doc_id, color):
    path = "baseline/media/"+doc_id+".jpeg"
    if color == "green":
      im = Image.new("RGB", (100, 300), "#ffffff")
      draw = ImageDraw.Draw(im)
      # draw grey Background
      draw.rectangle(((0, 0), (100, 300)), fill="gray")
      draw.ellipse(((10,10), (90,90)),fill=light_gray)
      draw.ellipse(((10,110), (90,190)),fill=light_gray)
      draw.ellipse(((10,210), (90,290)),fill="green")
      im.save(path)
    if color == "yellow":
      im = Image.new("RGB", (100, 300), "#ffffff")
      draw = ImageDraw.Draw(im)
      # draw grey Background
      draw.rectangle(((0, 0), (100, 300)), fill="gray")
      draw.ellipse(((10,10), (90,90)),fill=light_gray)
      draw.ellipse(((10,110), (90,190)),fill="yellow")
      draw.ellipse(((10,210), (90,290)),fill=light_gray)
      im.save(path)
    if color == "red":
      im = Image.new("RGB", (100, 300), "#ffffff")
      draw = ImageDraw.Draw(im)
      # draw grey Background
      draw.rectangle(((0, 0), (100, 300)), fill="gray")
      draw.ellipse(((10,10), (90,90)),fill="red")
      draw.ellipse(((10,110), (90,190)),fill=light_gray)
      draw.ellipse(((10,210), (90,290)),fill=light_gray)
      im.save(path)

In [66]:
def save_snippet_in_baseline(warning_text,color,doc_id,topic):
    #print(warning_text,doc_id,topic)
    #print(doc_id)
    snippet_data = get_snippet_by_id(doc_id)
    #print(snippet_data)
    title = snippet_data["snippet_title"]
    url = snippet_data["snippet_url"]
    content = snippet_data["snippet_description"]
    obj = {
        "title" : title,
        "url" : url,
        "content" : content,
        "color": color+".jpg",
        "warning" : warning_text
    }
    save_snippet_by_json(obj, topic, doc_id)

In [67]:
def aspects_to_txt(aspects):
    if len(aspects) == 0:
        return ""
    if len(aspects) == 1:
        return aspects[0]
    if len(aspects) == 2:
        return aspects[0]+", "+aspects[1]
    if len(aspects) == 3:
        return aspects[0]+", "+aspects[1]+" and "+aspects[2]
    return aspects[0]+", "+aspects[1]+", "+aspects[2]+" and "+aspects[3]

## Show results for test documents

In [68]:
topics_y = [9,9,9,9,9,9,9,9,20,20,20,20,20,20,20,20,25,25,25,25,25,25,25,25,30,30,30,30,30,30,30,30,47,47,47,47,47,47,47,47,49,49,49,49,49,49,49,49]

# first we need to load the ids of the test documents
with open("test_document_ids.txt","r") as f:
    test_document_ids = [line.rstrip() for line in f]

# load assessments txt file to load labels of test data to evaluate model
file_path = "assessments_with_bl.txt"
with open(file_path) as f:
    assessments = f.readlines()

# load / save labels of test data in test_y
test_y = []
individuals = []
for doc_id in test_document_ids:
    for doc in assessments:
        if doc.split(' ')[2] == doc_id:
            if doc.split(' ')[3] == "2" and (doc_id in individuals) == False:
                individuals.append(doc_id)
                test_y.append("real")
            elif (doc_id in individuals) == False:
                individuals.append(doc_id)
                test_y.append("fake")
print(len(test_y))    

# print current index while evaluating each document
index = 0

# count how many times all classifiers predict the same label
total_count = 0

# count how many times all classifiers predict the same label correctly
count = 0

# 0.7 0.6
threshold = 0.6
threshold_agonistic = 0.6

num_of_hints = 0
for doc_id in test_document_ids:
    warning_ads = "-"
    warning_content = "-"
    warning_title = "-"
    warning_topic_agonistic = "-"
    warning_text = ""
    # agency predictions for fake 
    if agency_predictions[doc_id]["certainity"] > 0.5 and agency_predictions[doc_id]["prediction"] == "fake":
        warning_ads = "x"
    # content predictions for fake     
    if content_predictions[doc_id]["certainity"] >= threshold and content_predictions[doc_id]["prediction"] == "fake":
        warning_content = "x"
    # title predictions for fake 
    if title_predictions[doc_id]["certainity"] >= threshold and title_predictions[doc_id]["prediction"] == "fake":
        warning_title = "x"
    # topic agonistic predictions for fake 
    if topic_agonistic_predictions[doc_id]["certainity"] >= threshold_agonistic and topic_agonistic_predictions[doc_id]["prediction"] == "fake":
        warning_topic_agonistic = "x"
    # agency predictions for real 
    if agency_predictions[doc_id]["certainity"] > 0.5 and agency_predictions[doc_id]["prediction"] == "real":
        warning_ads = "o"
    # content predictions for real 
    if content_predictions[doc_id]["certainity"] >= threshold and content_predictions[doc_id]["prediction"] == "real":
        warning_content = "o"
    # title predictions for real 
    if title_predictions[doc_id]["certainity"] >= threshold and title_predictions[doc_id]["prediction"] == "real":
        warning_title = "o"
    # topic agonistic predictions for real 
    if topic_agonistic_predictions[doc_id]["certainity"] >= threshold_agonistic and topic_agonistic_predictions[doc_id]["prediction"] == "real":
        warning_topic_agonistic = "o"
    # check if all three classifiers do predict the same label (fake)

    # Count Assignments of each class
    count_fake_assignments = 0
    count_real_assignments = 0

    # color assignment
    color = "yellow"

    # warning aspects
    fake_aspects = []
    real_aspects = []
    
    # add fake aspects
    if warning_content == "x":
        count_fake_assignments += 1
        fake_aspects.append("content")
    if warning_title == "x":
        count_fake_assignments += 1
        fake_aspects.append("title")
    if warning_topic_agonistic == "x":
        count_fake_assignments += 1
        fake_aspects.append("language")
    if warning_ads == "x":
        count_fake_assignments += 1
        fake_aspects.append("advertisements")

    # add real aspects
    if warning_content == "o":
        count_real_assignments += 1
        real_aspects.append("content")
    if warning_title == "o":
        count_real_assignments += 1
        real_aspects.append("title")
    if warning_topic_agonistic == "o":
        count_real_assignments += 1
        real_aspects.append("language")
    if warning_ads == "o":
        count_real_assignments += 1
        real_aspects.append("advertisements")
    
    fake_aspects = aspects_to_txt(fake_aspects)
    real_aspects = aspects_to_txt(real_aspects)
    
    if count_fake_assignments > 2:
       color = "red"
       total_count += 1
       num_of_hints += 1
       warning_text = "Based on the "+fake_aspects+" on this page, there is a risk that this page is not credible!"
       print(warning_text)
       if test_y[index] == "fake":
           count += 1
    if count_real_assignments > 2:
       color = "green"
       total_count += 1
       num_of_hints += 1
       warning_text =  "Based on the "+real_aspects+" on this page, it can be assumed that this page is credible!"
       print(warning_text)
       if test_y[index] == "real":
           count += 1

    save_snippet_in_baseline(warning_text,color,doc_id,topics_y[index])
    #create_traffic_light(doc_id, color)

    # print results of evaluation
    print(doc_id, test_y[index], warning_content,warning_title,warning_ads, warning_topic_agonistic)
    #print(test_y[index],content_predictions[doc_id]["prediction"], title_predictions[doc_id]["prediction"], agency_predictions[doc_id]["prediction"])
    index += 1

# Print how many of the 
print(count/total_count)
print("num of hints: ", total_count, "Num of Snippets with a hint: ", total_count / 48)

48
c2e0da8e-0306-4a4b-b593-84c8a45395db real - o x -
cabe8e93-81be-4d48-8333-266f731af782 real x - x -
Based on the content, language and advertisements on this page, there is a risk that this page is not credible!
0ea1f2e6-b532-44c0-821f-70a6ef6735eb fake x o x x
Based on the content, title and advertisements on this page, there is a risk that this page is not credible!
cb5946b7-39a9-4d03-aa3a-9930a4c3d77e real x x x o
Based on the content, title, language and advertisements on this page, there is a risk that this page is not credible!
daee3dea-9939-4a41-a54c-8c48fd25f4b8 real x x x x
Based on the title, language and advertisements on this page, there is a risk that this page is not credible!
60781b1a-0a9c-4802-9e0f-eea97e20db36 fake - x x x
Based on the content, title and advertisements on this page, there is a risk that this page is not credible!
b9cbbf6a-43d8-4be3-aa6c-da60d1161ba8 fake x x x -
Based on the content, title, language and advertisements on this page, there is a risk t