# Automated Fact-Checking System - Demo

This notebook demonstrates the full fact-checking pipeline:
1. **Triplet Extraction** - Extract (subject, predicate, object) from claims
2. **Entity Linking** - Map entities to DBpedia URIs
3. **Knowledge Base Query** - Verify claims against DBpedia
4. **Neural Classification** - BERT-based verdict prediction
5. **Final Verdict** - SUPPORTED / REFUTED / NOT ENOUGH INFO
6. **Explainability** - T5-generated explanations, KB reasoning chains, attention analysis, confidence decomposition

In [None]:
import sys
sys.path.insert(0, '..')

import logging
logging.basicConfig(level=logging.WARNING)

from src.triplet_extractor import TripletExtractor
from src.entity_linker import EntityLinker
from src.knowledge_query import KnowledgeQuery
from src.fact_checker import FactChecker, format_result

print('All modules loaded successfully!')

## 1. Triplet Extraction

We use spaCy dependency parsing to extract (subject, predicate, object) triplets from English sentences.

In [None]:
extractor = TripletExtractor()

sentences = [
    "Paris is the capital of France",
    "Barack Obama was born in Hawaii",
    "Albert Einstein developed the theory of relativity",
    "The Eiffel Tower is located in Paris",
    "Tokyo is the capital of Japan",
]

for sent in sentences:
    triplets = extractor.extract(sent)
    print(f'\n"{sent}"')
    for s, p, o in triplets:
        print(f'  Subject: {s}')
        print(f'  Predicate: {p}')
        print(f'  Object: {o}')

## 2. Entity Linking

Map extracted entities to their DBpedia URIs using the DBpedia Lookup API.

In [None]:
linker = EntityLinker()

entities = ["Paris", "France", "Barack Obama", "Hawaii", "Eiffel Tower", "Albert Einstein", "Tokyo", "Japan"]

for entity in entities:
    uri = linker.link(entity)
    print(f'{entity:20s} -> {uri}')

## 3. Knowledge Base Query

Verify relations between entities using DBpedia SPARQL and JSON endpoints.

In [None]:
kq = KnowledgeQuery()

pairs = [
    ("http://dbpedia.org/resource/Paris", "http://dbpedia.org/resource/France"),
    ("http://dbpedia.org/resource/Barack_Obama", "http://dbpedia.org/resource/Hawaii"),
    ("http://dbpedia.org/resource/Eiffel_Tower", "http://dbpedia.org/resource/Paris"),
    ("http://dbpedia.org/resource/Tokyo", "http://dbpedia.org/resource/Japan"),
]

for subj, obj in pairs:
    result = kq.verify_triplet(subj, obj)
    subj_name = subj.split('/')[-1].replace('_', ' ')
    obj_name = obj.split('/')[-1].replace('_', ' ')
    print(f'\n{subj_name} <-> {obj_name}')
    print(f'  Found: {result["found"]} (via {result["method"]})')
    for p in result['predicates'][:3]:
        print(f'  Predicate: {p.split("/")[-1]}')

## 4. Full Pipeline - Fact Checking

Run the complete pipeline on 10 example claims.

In [None]:
# Load the full pipeline (with neural model if available, otherwise KB-only)
import os
model_path = '../models/fact_checker'
use_neural = os.path.exists(model_path)
checker = FactChecker(model_path=model_path if use_neural else None, use_neural=use_neural)
print(f'Pipeline loaded (neural model: {"enabled" if use_neural else "disabled - KB only"})')

In [None]:
claims = [
    # True claims
    "Paris is the capital of France",
    "Barack Obama was born in Hawaii",
    "The Eiffel Tower is located in Paris",
    "Albert Einstein developed the theory of relativity",
    "Tokyo is the capital of Japan",
    # False claims
    "The Earth is flat",
    "Napoleon was born in England",
    "Mars is the largest planet in the solar system",
    # Ambiguous claims
    "Chocolate causes acne",
    "Dogs can sense earthquakes before they happen",
]

expected = [
    "SUPPORTED", "SUPPORTED", "SUPPORTED", "SUPPORTED", "SUPPORTED",
    "REFUTED", "REFUTED", "REFUTED",
    "NOT ENOUGH INFO", "NOT ENOUGH INFO",
]

results = []
for claim in claims:
    result = checker.check(claim)
    results.append(result)
    print('=' * 60)
    print(format_result(result))
    print()

## 5. Metrics

Evaluate the pipeline's performance against expected verdicts.

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report

predicted = [r['verdict'] for r in results]

print('Claim-by-claim results:')
print(f'{"Claim":50s} {"Expected":18s} {"Predicted":18s} {"Match"}')
print('-' * 100)
for claim, exp, pred in zip(claims, expected, predicted):
    match = 'OK' if exp == pred else 'MISS'
    print(f'{claim:50s} {exp:18s} {pred:18s} {match}')

# Overall metrics
labels = ['SUPPORTED', 'REFUTED', 'NOT ENOUGH INFO']
acc = accuracy_score(expected, predicted)
print(f'\n{"=" * 50}')
print(f'Accuracy: {acc:.2%}')
print(f'\nClassification Report:')
print(classification_report(expected, predicted, labels=labels, zero_division=0))

In [None]:
# Confidence distribution
print('\nConfidence distribution by verdict:')
for verdict in labels:
    confs = [r['confidence'] for r in results if r['verdict'] == verdict]
    if confs:
        avg_conf = sum(confs) / len(confs)
        print(f'  {verdict:18s}: avg={avg_conf:.3f}, min={min(confs):.3f}, max={max(confs):.3f} (n={len(confs)})')

## 6. Full Pipeline with Explainability

Load the pipeline with the T5-based explainer to generate natural language explanations for each verdict.

In [None]:
# Load the pipeline with explainability
explainer_path = '../models/explainer'
use_explainer = os.path.exists(explainer_path)

checker_full = FactChecker(
    model_path=model_path if use_neural else None,
    use_neural=use_neural,
    use_explainer=use_explainer,
    explainer_model_path=explainer_path,
)
print(f'Full pipeline loaded (neural: {"ON" if use_neural else "OFF"}, explainer: {"ON" if use_explainer else "OFF"})')

In [None]:
# Run the full pipeline with explainer on all claims
results_full = []
for claim in claims:
    result = checker_full.check(claim)
    results_full.append(result)
    print('=' * 60)
    print(format_result(result))
    print()

## 7. Performance Comparison: Base vs Full Pipeline

Compare the base pipeline (KB + BERT) against the full pipeline (KB + BERT + Explainer).

In [None]:
predicted_base = [r['verdict'] for r in results]
predicted_full = [r['verdict'] for r in results_full]

acc_base = accuracy_score(expected, predicted_base)
acc_full = accuracy_score(expected, predicted_full)

print('=' * 80)
print(f'{"PERFORMANCE COMPARISON":^80}')
print('=' * 80)

# Claim-by-claim comparison
print(f'\n{"Claim":42s} {"Expected":15s} {"Base":15s} {"Full":15s}')
print('-' * 87)
for claim_text, exp, pb, pf in zip(claims, expected, predicted_base, predicted_full):
    base_mark = 'OK' if exp == pb else 'MISS'
    full_mark = 'OK' if exp == pf else 'MISS'
    print(f'{claim_text:42s} {exp:15s} {pb:10s} {base_mark:4s} {pf:10s} {full_mark}')

print(f'\n{"=" * 80}')
print(f'Base pipeline accuracy:  {acc_base:.2%}')
print(f'Full pipeline accuracy:  {acc_full:.2%}')
print(f'{"=" * 80}')

In [None]:
# Classification reports side by side
print('BASE PIPELINE - Classification Report')
print(classification_report(expected, predicted_base, labels=labels, zero_division=0))

print('\nFULL PIPELINE (with Explainer) - Classification Report')
print(classification_report(expected, predicted_full, labels=labels, zero_division=0))

In [None]:
# Confidence distribution comparison
print('CONFIDENCE DISTRIBUTION')
print('=' * 70)

for pipeline_name, res_list in [('Base', results), ('Full', results_full)]:
    print(f'\n--- {pipeline_name} Pipeline ---')
    for verdict in labels:
        confs = [r['confidence'] for r in res_list if r['verdict'] == verdict]
        if confs:
            avg_conf = sum(confs) / len(confs)
            print(f'  {verdict:18s}: avg={avg_conf:.3f}, min={min(confs):.3f}, max={max(confs):.3f} (n={len(confs)})')
        else:
            print(f'  {verdict:18s}: (no predictions)')

## 8. Detailed Explanation Showcase

Full multi-layered explanations for 3 representative claims: one SUPPORTED, one REFUTED, one NOT ENOUGH INFO.

In [None]:
from src.explainer import FactExplainer

# Use the explainer from the full checker if available, otherwise create one
if checker_full.explainer is not None:
    explainer_obj = checker_full.explainer
else:
    explainer_obj = FactExplainer(use_t5=use_explainer, t5_model_path=explainer_path, use_attention=True)

# Pick 3 representative results from the full pipeline
showcase_indices = [0, 5, 8]  # Paris/France (SUPPORTED), Earth flat (REFUTED), Chocolate/acne (NEI)

for idx in showcase_indices:
    r = results_full[idx]
    print('#' * 70)
    print(f'# CLAIM: {r["claim"]}')
    print(f'# VERDICT: {r["verdict"]} (confidence: {r["confidence"]:.2f})')
    print('#' * 70)

    explanation = r.get('explanation')
    if explanation is None:
        # Generate explanation if not already in the result
        explanation = explainer_obj.explain(r, classifier=checker_full.classifier)

    print(explainer_obj.format_explanation(explanation))
    print()