In [None]:
pip install requests



In [None]:
API_KEY = "<insert_your_api_key>"

In [None]:
import requests
import json

def query_fact_check_api(claim):
    """Queries the Google Fact Check Tools API for a given claim.

    Args:
        claim (str): The claim to search for fact checks.

    Returns:
        dict: The API response parsed as a JSON object.
    """

    url = "https://factchecktools.googleapis.com/v1alpha1/claims:search"
    params = {
        "key": API_KEY,
        "query": claim,
    }

    response = requests.get(url, params=params)
    response.raise_for_status()  # Raise an exception for error HTTP statuses

    return response.json()

# if __name__ == "__main__":
#     claim_to_check = "The Earth is flat"  # Example claim
#     result = query_fact_check_api(claim_to_check)

#     if result.get("claims"):
#         for claim in result["claims"]:
#             print("Claim:", claim["text"])
#             print("Fact Check Results:")
#             for review in claim["claimReview"]:
#                 print(f"\tPublisher: {review['publisher']['name']}")
#                 print(f"\tURL: {review['url']}")
#                 print(f"\tRating: {review['textualRating']}\n")
#     else:
#         print("No fact checks found for this claim.")


In [None]:
# Example Usage

claim_to_check = "The Earth is round"
result = query_fact_check_api(claim_to_check)

if result.get("claims"):
    for claim in result["claims"]:
        print("Claim:", claim["text"])
        print("Fact Check Results:")
        for review in claim["claimReview"]:
            print(f"\tPublisher: {review['publisher']['name']}")
            print(f"\tURL: {review['url']}")
            print(f"\tRating: {review['textualRating']}\n")
else:
    print("No fact checks found for this claim.")

Claim: Pictures show the Earth is flat, and sea levels haven’t changed
Fact Check Results:
	Publisher: USA Today
	URL: https://www.usatoday.com/story/news/factcheck/2023/08/07/false-claim-the-earth-is-flat-and-sea-levels-arent-rising-fact-check/70533704007/
	Rating: False

Claim: The Earth is flat.
Fact Check Results:
	Publisher: Full Fact
	URL: https://fullfact.org/online/earth-is-spherical-not-flat/
	Rating: We have abundant evidence going back thousands of years that the Earth is roughly spherical.

Claim: Satellites are fake.
Fact Check Results:
	Publisher: Full Fact
	URL: https://fullfact.org/online/earth-is-spherical-not-flat/
	Rating: There are many man-made satellites currently in orbit, playing an essential role in common electronic devices.

Claim: Radar technology wouldn’t work if the Earth was a globe
Fact Check Results:
	Publisher: USA Today
	URL: https://www.usatoday.com/story/news/factcheck/2023/05/08/no-radar-technology-doesnt-prove-earth-is-flat-fact-check/11728472002/

In [None]:
# Another Example
result = query_fact_check_api("The earth is flat")
print(result)

{'claims': [{'text': 'The Earth is flat.', 'claimant': 'instagram user', 'claimDate': '2023-02-12T00:00:00Z', 'claimReview': [{'publisher': {'name': 'Full Fact', 'site': 'fullfact.org'}, 'url': 'https://fullfact.org/online/earth-is-spherical-not-flat/', 'title': 'The Earth is not flat', 'reviewDate': '2023-03-03T00:00:00Z', 'textualRating': 'We have abundant evidence going back thousands of years that the Earth is roughly spherical.', 'languageCode': 'en'}]}, {'text': 'Satellites are fake.', 'claimant': 'instagram user', 'claimDate': '2023-02-12T00:00:00Z', 'claimReview': [{'publisher': {'name': 'Full Fact', 'site': 'fullfact.org'}, 'url': 'https://fullfact.org/online/earth-is-spherical-not-flat/', 'title': 'The Earth is not flat', 'reviewDate': '2023-03-03T00:00:00Z', 'textualRating': 'There are many man-made satellites currently in orbit, playing an essential role in common electronic devices.', 'languageCode': 'en'}]}, {'text': 'Pictures show the Earth is flat, and sea levels haven’

In [None]:
def calculate_average_true_percentages(claims, doPrint=False):
    """Calculates average 'True' rating percentages for a list of claims.

    Args:
        claims (list): A list of claims to check.

    Returns:
        list: A list of average 'True' rating percentages corresponding to each claim.
    """

    if doPrint:
        def debug_print(format, *values):
            print(format.format(*values))
    else:
        def debug_print(format, *values): pass

    percentages = []
    raw_values = []

    for claim in claims:
        result = query_fact_check_api(claim)

        debug_print(f"Evaluating claim: '{claim}'")

        total_ratings = 0
        true_count = 0
        false_count = 0
        unknown_count = 0
        if result.get("claims"):
            for claim in result["claims"]:
                for review in claim["claimReview"]:
                    total_ratings += 1
                    debug_print(f"\trating: {review['textualRating']}")
                    if review['textualRating'] == "True":
                        true_count += 1
                        debug_print("\ttrue")
                    elif review['textualRating'] == "False":
                        debug_print("\tfalse")
                        false_count += 1
                    else:
                        debug_print("\tunknown")
                        unknown_count += 1

            if total_ratings > 0:
                average_true_percentage = (true_count / (true_count + false_count)) * 100
                percentages.append(average_true_percentage)
            else:
                debug_print("\tNo Ratings")
                percentages.append(None)  # No ratings
        else:
            debug_print("\tNo Ratings/Failed Query")
            percentages.append(None)  # No fact checks found
        raw_values.append((true_count, false_count, unknown_count))

    return percentages, raw_values

In [None]:
    claims_list = ["The Earth is flat", "Vaccines cause autism", "Moon landing was a hoax", "The Earth is round", "Europe is a continent", "NASA has not found aliens"]
    claims_answers = [False, False, False, True, True, True]
    assert len(claims_answers) == len(claims_list)

    results, raw_counts = calculate_average_true_percentages(claims_list, True)


Evaluating claim: 'The Earth is flat'
	rating: We have abundant evidence going back thousands of years that the Earth is roughly spherical.
	unknown
	rating: There are many man-made satellites currently in orbit, playing an essential role in common electronic devices.
	unknown
	rating: False
	false
	rating: False
	false
	rating: False
	false
	rating: False
	false
	rating: False
	false
	rating: False
	false
	rating: Altered
	unknown
	rating: False
	false
Evaluating claim: 'Vaccines cause autism'
	rating: False
	false
	rating: Unsupported
	unknown
	rating: False
	false
	rating: False
	false
	rating: Incorrect
	unknown
	rating: Incorrect
	unknown
	rating: Misleading
	unknown
	rating: False. Studies show there is no notable difference in autism rates between vaccinated and unvaccinated children.
	unknown
	rating: Incorrect
	unknown
	rating: Incorrect
	unknown
Evaluating claim: 'Moon landing was a hoax'
	rating: False
	false
	rating: False. The initial images used by newspapers were taken f

In [None]:
for claim, percentage, raw_count in zip(claims_list, results, raw_counts):
    if percentage is not None:
        rating = "Real" if percentage > 0.5 else "Fake"
        print(f"Claim: {claim}\n\tRating: {rating}\n\tPercentage Real: {percentage:.2f}%\n\tNumber of Unknown Ratings: {raw_count[2]}")
    else:
        print(f"Claim: {claim}\n\tNo fact checks found")

Claim: The Earth is flat
	Rating: Fake
	Percentage Real: 0.00%
	Number of Unknown Ratings: 3
Claim: Vaccines cause autism
	Rating: Fake
	Percentage Real: 0.00%
	Number of Unknown Ratings: 7
Claim: Moon landing was a hoax
	Rating: Fake
	Percentage Real: 0.00%
	Number of Unknown Ratings: 3
Claim: The Earth is round
	Rating: Fake
	Percentage Real: 0.00%
	Number of Unknown Ratings: 2
Claim: Europe is a continent
	No fact checks found
Claim: NASA has not found aliens
	Rating: Fake
	Percentage Real: 0.00%
	Number of Unknown Ratings: 0


In [None]:
# Evaluation
def evaluate(claims_list, results, raw_counts, claims_answers):
    n_incorrect = 0
    n_correct = 0
    n_unknown_evals = 0 # The review for the claims didn't contain "True" or "False" so we couldn't determine the evaluation
    n_no_evals = 0 # Google had no reviews for the claim

    for claim, percentage, raw_count, answer in zip(claims_list, results, raw_counts, claims_answers):
        guess = False
        if percentage is None:
          n_no_evals += 1
          print(f"[FAILED]!; Could not produce an evaluation;\t\t\tclaim='{claim}'")
          continue
        elif percentage > 0.5:
          guess = True

        n_unknown_evals += raw_count[2]

        if guess == answer:
            n_correct += 1
            print(f"[OKAY]  !; nTrueEvals={raw_count[0]}; nFalseEvals={raw_count[1]}; nUnknownEvals={raw_count[2]};\tclaim='{claim}")
        else:
            n_incorrect += 1
            print(f"[FAILED]!; nTrueEvals={raw_count[0]}; nFalseEvals={raw_count[1]}; nUnknownEvals={raw_count[2]};\tclaim='{claim}")

    if (n_correct+n_incorrect) == 0:
      print(f"All evaluations failed!")
    else:
      print(f"\n\nOverall success rate is {round(n_correct/(n_correct+n_incorrect)*100)}%")
      print(f"Number of unknown evaluations for all claims combined: {n_unknown_evals} which is {round(n_unknown_evals/(n_correct+n_incorrect)*100)}% of all evals (successful or unknown; this number should be as small as possible) \| (means we didn't know if the evaluation was true or false)")

    print(f"n_correct={n_correct}; n_incorrect={n_incorrect}; total successful evaluations: {n_correct+n_incorrect}")
    print(f"Number of claims we couldn't evaluate: {n_no_evals}")

In [None]:
evaluate(claims_list, results, raw_counts, claims_answers)

[OKAY]  !; nTrueEvals=0; nFalseEvals=7; nUnknownEvals=3;	claim='The Earth is flat
[OKAY]  !; nTrueEvals=0; nFalseEvals=3; nUnknownEvals=7;	claim='Vaccines cause autism
[OKAY]  !; nTrueEvals=0; nFalseEvals=7; nUnknownEvals=3;	claim='Moon landing was a hoax
[FAILED]!; nTrueEvals=0; nFalseEvals=8; nUnknownEvals=2;	claim='The Earth is round
[FAILED]!; Could not produce an evaluation;			claim='Europe is a continent
[FAILED]!; nTrueEvals=0; nFalseEvals=1; nUnknownEvals=0;	claim='NASA has not found aliens


Overall success rate is 60%
Number of unknown evaluations for all claims combined: 15 which is 300% of all evals (successful or unknown; this number should be as small as possible) \| (means we didn't know if the evaluation was true or false)
n_correct=3; n_incorrect=2; total successful evaluations: 5
Number of claims we couldn't evaluate: 1


## Evaluation!

Load the/a dataset first

In [None]:
!unzip fake_news_detection_dataset.zip

import pandas as pd
import re

Archive:  fake_news_detection_dataset.zip
  inflating: News _dataset/Fake.csv  
  inflating: News _dataset/True.csv  


In [None]:
# ------------------------
#  Configuration
# ------------------------
dataset_path_fake = "News _dataset/Fake.csv"
dataset_path_real = "News _dataset/True.csv"

# ------------------------
# Load and Preprocess Dataset
# ------------------------
df_real = pd.read_csv(dataset_path_real)
df_fake = pd.read_csv(dataset_path_fake)

# ------------------------
# Create the 'is fake or real' column
# ------------------------
df_real['news_type'] = '1'
df_fake['news_type'] = '0'

# ------------------------
# Combine the dataframes into one and shuffle it
# ------------------------
df_real_and_fake = pd.concat([df_real, df_fake], ignore_index=True)
df_real_and_fake = df_real_and_fake.sample(frac = 1)

# ------------------------
# Cleanup function if desired
# ------------------------
# Basic text cleaning for classifier
def clean_text(text):
    # text = text.lower()  # Convert to lowercase
    # Add more cleaning steps if needed (removing punctuation, etc.)
    # In this case, remove all characters whitch are not alphanumeric
    text = re.sub("[^A-Za-z0-9 ]"," ",text)
    return text

# ------------------------
# Clean the titles
# ------------------------
df_real_and_fake['cleaned_title'] = df_real_and_fake['title'].apply(clean_text)

# ------------------------
# Reduce the dataset size for testing models
# ------------------------
REDUCED_SIZE = 100

fraction = REDUCED_SIZE / df_real_and_fake.size

# Sample to new size
df_real_and_fake_reduced = df_real_and_fake.sample(frac=fraction)
# Shuffle
df_real_and_fake = df_real_and_fake.sample(frac=1)

print(f"Used fraction=={fraction}; New number of dataframe items: {df_real_and_fake_reduced.size}")

df_real_and_fake_reduced.head()

# ------------------------
# Create tuple of cleaned titles and is real/fake
# ------------------------
titles_to_analyze = df_real_and_fake_reduced["cleaned_title"].tolist()  # Extract titles
titles_isReal = df_real_and_fake_reduced["news_type"].tolist()  # Extract 0 and 1 (fake and real) information
titles_isReal = [ bool(int(i)) for i in titles_isReal ] # Convert 0 to False and 1 to True

assert len(titles_isReal) == len(titles_to_analyze)

# # Create a list of tuple values in the form [(title1, True), (title2, False)]
# dataset = []
# for title, answer in zip(titles_to_analyze, titles_isReal):
#   dataset.append((title, answer))

# assert len(dataset) == len(titles_to_analyze)


Used fraction==0.0003712117837468633; New number of dataframe items: 102


In [None]:
for i, (t, e) in enumerate(zip(titles_to_analyze, titles_isReal)):
  print(f"isTrue={e};\ttitle={t}")
  if i > 5:
    break

isTrue=False;	title= Racist TX High Schoolers Spray Paint  Whites Only  Near Drinking Fountain  IMAGE 
isTrue=True;	title=Vatican treasurer to face March court hearing in Australia over historical sex charges
isTrue=False;	title=HOW MEMPHIS TEEN DEFIED OBAMA S  Black Victim  Narrative Used Positive Attitude And Strong Work Ethic  Not Violence or Freebies To Make A Better Life  VIDEO 
isTrue=False;	title= Whoopi Goldberg Gets PISSED  Gives Trump The Reality Check We ve All Been Waiting For  VIDEO 
isTrue=False;	title= Read Gabby Giffords  Beautiful Letter In Solidarity With Democratic Sit In
isTrue=False;	title=Hillary s Secret Weapon  Evan McMullin is CIA Goldman Sachs candidate  backed by Mitt Romney s Wall Street Machine
isTrue=False;	title=White House Sinks To New Low  Spokesperson Makes Fun of Trump s  Outrageous Appearance  and Hair


Do the evaluation

In [None]:
claims_list = titles_to_analyze
claims_answers = titles_isReal
assert len(claims_answers) == len(claims_list)

print(f"Number of claims=={len(claims_list)}")

results, raw_counts = calculate_average_true_percentages(claims_list, True)

print("\n---------------------------------------------------------\n")

evaluate(claims_list, results, raw_counts, claims_answers)


Number of claims==17
Evaluating claim: ' Racist TX High Schoolers Spray Paint  Whites Only  Near Drinking Fountain  IMAGE '
	No Ratings/Failed Query
Evaluating claim: 'Vatican treasurer to face March court hearing in Australia over historical sex charges'
	No Ratings/Failed Query
Evaluating claim: 'HOW MEMPHIS TEEN DEFIED OBAMA S  Black Victim  Narrative Used Positive Attitude And Strong Work Ethic  Not Violence or Freebies To Make A Better Life  VIDEO '
	No Ratings/Failed Query
Evaluating claim: ' Whoopi Goldberg Gets PISSED  Gives Trump The Reality Check We ve All Been Waiting For  VIDEO '
	No Ratings/Failed Query
Evaluating claim: ' Read Gabby Giffords  Beautiful Letter In Solidarity With Democratic Sit In'
	No Ratings/Failed Query
Evaluating claim: 'Hillary s Secret Weapon  Evan McMullin is CIA Goldman Sachs candidate  backed by Mitt Romney s Wall Street Machine'
	No Ratings/Failed Query
Evaluating claim: 'White House Sinks To New Low  Spokesperson Makes Fun of Trump s  Outrageous 