## Import Results

In [31]:
import json

# content predictions
with open("predictions/"+"content_predictions.json") as json_file:
    content_predictions = json.load(json_file)

# advertisement agency predictions
with open("predictions/"+"advertisement_agency_predictions.json") as json_file:
    agency_predictions = json.load(json_file)

# title predictions
with open("predictions/"+"title_predictions.json") as json_file:
    title_predictions = json.load(json_file)

# load raw snippet data
with open("raw_snippet_data.json") as json_file:
    raw_snippet_data = json.load(json_file)

In [32]:
def get_snippet_by_id(doc_id):
  for obj in raw_snippet_data:
      for snippet in obj["snippets"]:
          if snippet["snippet_id"] == doc_id:
              return snippet
  return None

def save_snippet_by_json(json_obj, topic, doc_id):
    with open("baseline/topics/"+str(topic)+"/"+doc_id+".json", 'w') as outfile:
       json.dump(json_obj, outfile)

In [33]:
def save_snippet_in_baseline(warning_text,doc_id,topic):
    #print(warning_text,doc_id,topic)
    snippet_data = get_snippet_by_id(doc_id)
    title = snippet_data["snippet_title"]
    url = snippet_data["snippet_url"]
    content = snippet_data["snippet_description"]
    obj = {
        "title" : title,
        "url" : url,
        "content" : content,
        "warning" : warning_text
    }
    save_snippet_by_json(obj, topic, doc_id)
    

## Show results for test documents

In [34]:
topics_y = [9,9,9,9,9,9,9,9,20,20,20,20,20,20,20,20,25,25,25,25,25,25,25,25,30,30,30,30,30,30,30,30,47,47,47,47,47,47,47,47,49,49,49,49,49,49,49,49]

# first we need to load the ids of the test documents
with open("test_document_ids.txt","r") as f:
    test_document_ids = [line.rstrip() for line in f]

# load assessments txt file to load labels of test data to evaluate model
file_path = "assessments_with_bl.txt"
with open(file_path) as f:
    assessments = f.readlines()

# load / save labels of test data in test_y
test_y = []
individuals = []
for doc_id in test_document_ids:
    for doc in assessments:
        if doc.split(' ')[2] == doc_id:
            if doc.split(' ')[3] == "2" and (doc_id in individuals) == False:
                individuals.append(doc_id)
                test_y.append("real")
            elif (doc_id in individuals) == False:
                individuals.append(doc_id)
                test_y.append("fake")
print(len(test_y))    

# print current index while evaluating each document
index = 0

# count how many times all classifiers predict the same label
total_count = 0

# count how many times all classifiers predict the same label correctly
count = 0

threshold = 0.7

num_of_hints = 0
for doc_id in test_document_ids:
    warning_ads = "-"
    warning_content = "-"
    warning_title = "-"
    warning_text = ""
    # agency predictions for fake 
    if agency_predictions[doc_id]["certainity"] > 0.5 and agency_predictions[doc_id]["prediction"] == "fake":
        warning_ads = "x"
    # content predictions for fake     
    if content_predictions[doc_id]["certainity"] >= threshold and content_predictions[doc_id]["prediction"] == "fake":
        warning_content = "x"
    # title predictions for fake 
    if title_predictions[doc_id]["certainity"] >= threshold and title_predictions[doc_id]["prediction"] == "fake":
        warning_title = "x"
    # agency predictions for real 
    if agency_predictions[doc_id]["certainity"] > 0.5 and agency_predictions[doc_id]["prediction"] == "real":
        warning_ads = "o"
    # content predictions for real 
    if content_predictions[doc_id]["certainity"] >= threshold and content_predictions[doc_id]["prediction"] == "real":
        warning_content = "o"
    # title predictions for real 
    if title_predictions[doc_id]["certainity"] >= threshold and title_predictions[doc_id]["prediction"] == "real":
        warning_title = "o"
    # check if all three classifiers do predict the same label (fake)
    if warning_content == "x" and warning_title == "x" and warning_ads == "x":
        total_count += 1
        num_of_hints += 1
        warning_text = "Based on the title, text and advertising content on this page, there is a risk that this page is not credible!"
        print(warning_text)
        if test_y[index] == "fake":
            count += 1
    # check if all three classifiers do predict the same label (real)
    if warning_content == "o" and warning_title == "o" and warning_ads == "o":
        total_count += 1
        num_of_hints += 1
        warning_text =  "Based on the title, text and advertising content on this page, it can be assumed that this page is credible!"
        print(warning_text)
        if test_y[index] == "real":
            count += 1


    save_snippet_in_baseline(warning_text,doc_id,topics_y[index])
    # print results of evaluation
    print(doc_id, test_y[index], warning_content,warning_title,warning_ads)
    #print(test_y[index],content_predictions[doc_id]["prediction"], title_predictions[doc_id]["prediction"], agency_predictions[doc_id]["prediction"])
    index += 1

# Print how many of the 
print(count/total_count)
print("num of hints: ", total_count, "Num of Snippets with a hint: ", total_count / 48)

48
c2e0da8e-0306-4a4b-b593-84c8a45395db real - o x
cabe8e93-81be-4d48-8333-266f731af782 real x - x
0ea1f2e6-b532-44c0-821f-70a6ef6735eb fake x - x
cb5946b7-39a9-4d03-aa3a-9930a4c3d77e real - x x
Based on the title, text and advertising content on this page, there is a risk that this page is not credible!
daee3dea-9939-4a41-a54c-8c48fd25f4b8 real x x x
60781b1a-0a9c-4802-9e0f-eea97e20db36 fake - x x
Based on the title, text and advertising content on this page, there is a risk that this page is not credible!
b9cbbf6a-43d8-4be3-aa6c-da60d1161ba8 fake x x x
670f17e8-1fa3-4a8b-9dc7-272e07e833ee fake x - x
06595b02-6a27-481e-adc3-f4f49a7fc124 fake o o x
6cf338bc-8613-4077-8878-f95ff6d1c9eb fake - o o
Based on the title, text and advertising content on this page, it can be assumed that this page is credible!
11755246-3c48-4ba3-afa4-d26e038c2a55 real o o o
Based on the title, text and advertising content on this page, there is a risk that this page is not credible!
b3c912ec-f594-4fb8-92f0-555

TypeError: 'NoneType' object is not subscriptable