# Phase 4 — Verification & Scoring

This notebook tests and debugs the verification and scoring pipeline:
- NLI/Stance model → SUPPORTED / REFUTED / UNCLEAR
- Show 3 citations (with highlights)
- Confidence badge: 🟢 Likely True (≥0.7) | 🟡 Unclear (0.4–0.7) | 🔴 Likely False (≤0.4)

## Step 1: Setup and Dependencies

In [None]:
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(''))))

import json
import numpy as np
from typing import List, Dict, Tuple, Optional
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
from datetime import datetime
from enum import Enum

# Define verdict types
class Verdict(Enum):
    SUPPORTED = "SUPPORTED"
    REFUTED = "REFUTED"
    UNCLEAR = "UNCLEAR"

# Test data from Phase 3
test_claim_evidence = {
    'claim': 'The new COVID vaccine causes severe side effects in 80% of patients',
    'evidence_sources': [
        {
            'title': 'PIB Fact Check: No Evidence of 80% Severe Side Effects from COVID Vaccines',
            'content': 'PIB fact-checking unit debunks false claims about COVID vaccine side effects. Clinical trials show side effects occur in less than 5% of patients and are mostly mild.',
            'source_name': 'Press Information Bureau (PIB)',
            'credibility_score': 0.95,
            'key_snippets': ['Clinical trials show side effects occur in less than 5% of patients and are mostly mild']
        },
        {
            'title': 'WHO Updates COVID-19 Vaccine Safety Guidelines',
            'content': 'WHO releases new guidelines on COVID-19 vaccine safety monitoring. Studies confirm vaccines are safe with rare serious adverse events.',
            'source_name': 'World Health Organization',
            'credibility_score': 0.95,
            'key_snippets': ['Studies confirm vaccines are safe with rare serious adverse events']
        }
    ]
}

print("Dependencies loaded successfully!")
print(f"Test claim: {test_claim_evidence['claim']}")
print(f"Evidence sources: {len(test_claim_evidence['evidence_sources'])}")

## Step 2: Natural Language Inference (NLI) Setup

In [None]:
def load_nli_model(model_name: str = "microsoft/DialoGPT-medium"):
    """Load NLI model for stance detection"""
    try:
        # For demo, we'll use a simple rule-based approach
        # In production, use models like: 
        # - "microsoft/DialoGPT-medium"
        # - "facebook/bart-large-mnli"
        # - "roberta-large-mnli"
        
        print("Loading NLI model (mocked for demo)...")
        return "mock_nli_model"
        
    except Exception as e:
        print(f"Error loading NLI model: {str(e)}")
        return None

def mock_nli_inference(premise: str, hypothesis: str) -> Dict[str, float]:
    """Mock NLI inference for demonstration"""
    # Simple rule-based logic for demo
    premise_lower = premise.lower()
    hypothesis_lower = hypothesis.lower()
    
    # Look for contradictory keywords
    contradictory_pairs = [
        (['80%', 'severe'], ['5%', 'mild', 'rare']),
        (['causes', 'harmful'], ['safe', 'effective']),
        (['dangerous', 'toxic'], ['approved', 'beneficial'])
    ]
    
    # Check for contradictions
    contradiction_score = 0
    for claim_words, evidence_words in contradictory_pairs:
        claim_match = any(word in hypothesis_lower for word in claim_words)
        evidence_match = any(word in premise_lower for word in evidence_words)
        if claim_match and evidence_match:
            contradiction_score += 0.3
    
    # Calculate scores
    if contradiction_score > 0.5:
        return {
            'CONTRADICTION': 0.8,
            'ENTAILMENT': 0.1,
            'NEUTRAL': 0.1
        }
    elif contradiction_score > 0.2:
        return {
            'CONTRADICTION': 0.4,
            'ENTAILMENT': 0.2,
            'NEUTRAL': 0.4
        }
    else:
        # Look for supporting evidence
        support_keywords = ['confirms', 'shows', 'proves', 'evidence', 'study']
        support_score = sum(1 for word in support_keywords if word in premise_lower) * 0.2
        
        if support_score > 0.4:
            return {
                'ENTAILMENT': 0.7,
                'NEUTRAL': 0.2,
                'CONTRADICTION': 0.1
            }
        else:
            return {
                'NEUTRAL': 0.6,
                'ENTAILMENT': 0.2,
                'CONTRADICTION': 0.2
            }

# Load NLI model and test
nli_model = load_nli_model()

# Test NLI inference
test_premise = "Clinical trials show side effects occur in less than 5% of patients and are mostly mild"
test_hypothesis = "The new COVID vaccine causes severe side effects in 80% of patients"

nli_result = mock_nli_inference(test_premise, test_hypothesis)
print("\nNLI inference test:")
print(f"Premise: {test_premise}")
print(f"Hypothesis: {test_hypothesis}")
print(f"Results: {nli_result}")

## Step 3: Evidence-Claim Stance Detection

In [None]:
def analyze_claim_evidence_stance(claim: str, evidence: Dict) -> Dict[str, any]:
    """Analyze stance between claim and evidence"""
    # Get evidence text
    evidence_text = evidence.get('content', '')
    if evidence.get('key_snippets'):
        evidence_text += ' ' + ' '.join(evidence['key_snippets'])
    
    # Perform NLI inference
    nli_scores = mock_nli_inference(evidence_text, claim)
    
    # Determine stance
    max_score = max(nli_scores.values())
    stance = max(nli_scores.keys(), key=lambda k: nli_scores[k])
    
    # Map NLI labels to our verdict system
    stance_mapping = {
        'ENTAILMENT': Verdict.SUPPORTED,
        'CONTRADICTION': Verdict.REFUTED,
        'NEUTRAL': Verdict.UNCLEAR
    }
    
    verdict = stance_mapping.get(stance, Verdict.UNCLEAR)
    
    # Calculate confidence based on score and source credibility
    base_confidence = max_score
    credibility_boost = evidence.get('credibility_score', 0.5) * 0.2
    final_confidence = min(base_confidence + credibility_boost, 1.0)
    
    return {
        'evidence_title': evidence.get('title', 'Unknown'),
        'evidence_source': evidence.get('source_name', 'Unknown'),
        'stance': verdict.value,
        'confidence': final_confidence,
        'nli_scores': nli_scores,
        'credibility_score': evidence.get('credibility_score', 0.5),
        'evidence_text': evidence_text[:200] + '...' if len(evidence_text) > 200 else evidence_text
    }

# Test stance detection
print("Stance detection results:")
claim = test_claim_evidence['claim']
print(f"Claim: {claim}\n")

stance_results = []
for evidence in test_claim_evidence['evidence_sources']:
    stance_result = analyze_claim_evidence_stance(claim, evidence)
    stance_results.append(stance_result)
    
    print(f"Evidence: {stance_result['evidence_title']}")
    print(f"  Source: {stance_result['evidence_source']}")
    print(f"  Stance: {stance_result['stance']}")
    print(f"  Confidence: {stance_result['confidence']:.3f}")
    print(f"  NLI Scores: {stance_result['nli_scores']}")
    print()

## Step 4: Aggregate Verdict Calculation

In [None]:
def calculate_aggregate_verdict(stance_results: List[Dict]) -> Dict[str, any]:
    """Calculate aggregate verdict from multiple evidence sources"""
    if not stance_results:
        return {
            'verdict': Verdict.UNCLEAR.value,
            'confidence': 0.0,
            'reasoning': 'No evidence found'
        }
    
    # Weight votes by confidence and credibility
    weighted_votes = {
        Verdict.SUPPORTED.value: 0,
        Verdict.REFUTED.value: 0,
        Verdict.UNCLEAR.value: 0
    }
    
    total_weight = 0
    evidence_count = {}
    
    for result in stance_results:
        stance = result['stance']
        confidence = result['confidence']
        credibility = result['credibility_score']
        
        # Calculate weight (confidence * credibility)
        weight = confidence * credibility
        weighted_votes[stance] += weight
        total_weight += weight
        
        # Count evidence by stance
        evidence_count[stance] = evidence_count.get(stance, 0) + 1
    
    # Normalize weights
    if total_weight > 0:
        for stance in weighted_votes:
            weighted_votes[stance] /= total_weight
    
    # Determine final verdict
    final_verdict = max(weighted_votes.keys(), key=lambda k: weighted_votes[k])
    final_confidence = weighted_votes[final_verdict]
    
    # Generate reasoning
    reasoning_parts = []
    for stance, count in evidence_count.items():
        if count > 0:
            reasoning_parts.append(f"{count} source(s) {stance.lower()}")
    
    reasoning = "Based on " + ", ".join(reasoning_parts) + " the claim."
    
    return {
        'verdict': final_verdict,
        'confidence': final_confidence,
        'weighted_votes': weighted_votes,
        'evidence_count': evidence_count,
        'reasoning': reasoning,
        'total_sources': len(stance_results)
    }

def get_confidence_badge(verdict: str, confidence: float) -> Dict[str, str]:
    """Get confidence badge based on verdict and confidence score"""
    if confidence >= 0.7:
        if verdict == Verdict.SUPPORTED.value:
            return {'emoji': '🟢', 'label': 'Likely True', 'color': 'green'}
        elif verdict == Verdict.REFUTED.value:
            return {'emoji': '🔴', 'label': 'Likely False', 'color': 'red'}
        else:
            return {'emoji': '🟡', 'label': 'Unclear', 'color': 'yellow'}
    elif confidence >= 0.4:
        return {'emoji': '🟡', 'label': 'Unclear', 'color': 'yellow'}
    else:
        return {'emoji': '⚫', 'label': 'Insufficient Evidence', 'color': 'gray'}

# Test aggregate verdict calculation
aggregate_result = calculate_aggregate_verdict(stance_results)
confidence_badge = get_confidence_badge(aggregate_result['verdict'], aggregate_result['confidence'])

print("Aggregate verdict calculation:")
print(f"Final Verdict: {aggregate_result['verdict']}")
print(f"Confidence: {aggregate_result['confidence']:.3f}")
print(f"Badge: {confidence_badge['emoji']} {confidence_badge['label']}")
print(f"Reasoning: {aggregate_result['reasoning']}")
print(f"Weighted Votes: {aggregate_result['weighted_votes']}")
print(f"Evidence Count: {aggregate_result['evidence_count']}")

## Step 5: Citation Extraction and Highlighting

In [None]:
def extract_citations(stance_results: List[Dict], max_citations: int = 3) -> List[Dict]:
    """Extract top citations with highlights"""
    # Sort by confidence * credibility
    sorted_results = sorted(stance_results, 
                          key=lambda x: x['confidence'] * x['credibility_score'], 
                          reverse=True)
    
    citations = []
    for i, result in enumerate(sorted_results[:max_citations]):
        citation = {
            'id': f"citation_{i+1}",
            'title': result['evidence_title'],
            'source': result['evidence_source'],
            'stance': result['stance'],
            'confidence': result['confidence'],
            'credibility': result['credibility_score'],
            'text_snippet': result['evidence_text'],
            'highlighted_text': highlight_relevant_text(result['evidence_text'], claim),
            'weight': result['confidence'] * result['credibility_score']
        }
        citations.append(citation)
    
    return citations

def highlight_relevant_text(text: str, claim: str) -> str:
    """Highlight relevant portions of text that relate to the claim"""
    # Extract key terms from claim
    claim_words = set(word.lower() for word in re.findall(r'\w+', claim) 
                     if len(word) > 3)  # Filter short words
    
    # Highlight matching words in text
    highlighted_text = text
    for word in claim_words:
        pattern = re.compile(re.escape(word), re.IGNORECASE)
        highlighted_text = pattern.sub(f'**{word.upper()}**', highlighted_text)
    
    return highlighted_text

def format_citation_summary(citations: List[Dict]) -> Dict[str, any]:
    """Format citation summary for display"""
    if not citations:
        return {'count': 0, 'citations': []}
    
    formatted_citations = []
    for citation in citations:
        stance_icon = {
            'SUPPORTED': '✅',
            'REFUTED': '❌',
            'UNCLEAR': '❓'
        }.get(citation['stance'], '❓')
        
        formatted_citation = {
            'id': citation['id'],
            'display_title': f"{stance_icon} {citation['title']}",
            'source': citation['source'],
            'stance': citation['stance'],
            'confidence_percent': f"{citation['confidence']*100:.1f}%",
            'credibility_score': citation['credibility'],
            'highlighted_snippet': citation['highlighted_text'][:150] + '...',
            'weight_score': citation['weight']
        }
        formatted_citations.append(formatted_citation)
    
    return {
        'count': len(citations),
        'citations': formatted_citations,
        'total_weight': sum(c['weight'] for c in citations)
    }

# Test citation extraction
citations = extract_citations(stance_results)
citation_summary = format_citation_summary(citations)

print("Citation extraction results:")
print(f"Total citations: {citation_summary['count']}")
print(f"Total weight: {citation_summary['total_weight']:.3f}\n")

for citation in citation_summary['citations']:
    print(f"Citation {citation['id']}:")
    print(f"  Title: {citation['display_title']}")
    print(f"  Source: {citation['source']}")
    print(f"  Stance: {citation['stance']} (Confidence: {citation['confidence_percent']})")
    print(f"  Highlighted: {citation['highlighted_snippet']}")
    print()

## Step 6: Confidence Score Calibration

In [None]:
def calibrate_confidence_score(aggregate_result: Dict, citations: List[Dict]) -> Dict[str, any]:
    """Calibrate confidence score based on multiple factors"""
    base_confidence = aggregate_result['confidence']
    
    # Factor 1: Number of sources
    source_count = aggregate_result['total_sources']
    source_bonus = min(source_count * 0.05, 0.15)  # Max 15% bonus
    
    # Factor 2: Source diversity (different types of sources)
    source_types = set()
    for citation in citations:
        source_name = citation['source'].lower()
        if 'government' in source_name or 'pib' in source_name or 'ministry' in source_name:
            source_types.add('government')
        elif 'who' in source_name or 'cdc' in source_name:
            source_types.add('international')
        elif 'wikipedia' in source_name:
            source_types.add('encyclopedia')
        else:
            source_types.add('media')
    
    diversity_bonus = len(source_types) * 0.03  # 3% per source type
    
    # Factor 3: Consensus strength
    verdict_distribution = aggregate_result['weighted_votes']
    max_vote = max(verdict_distribution.values())
    second_max_vote = sorted(verdict_distribution.values())[-2] if len(verdict_distribution) > 1 else 0
    consensus_strength = max_vote - second_max_vote
    consensus_bonus = consensus_strength * 0.1
    
    # Factor 4: High credibility source penalty/bonus
    avg_credibility = sum(c['credibility'] for c in citations) / len(citations) if citations else 0.5
    credibility_adjustment = (avg_credibility - 0.5) * 0.2
    
    # Calculate final calibrated confidence
    calibrated_confidence = base_confidence + source_bonus + diversity_bonus + consensus_bonus + credibility_adjustment
    calibrated_confidence = max(0.0, min(1.0, calibrated_confidence))  # Clamp to [0, 1]
    
    calibration_factors = {
        'base_confidence': base_confidence,
        'source_count_bonus': source_bonus,
        'diversity_bonus': diversity_bonus,
        'consensus_bonus': consensus_bonus,
        'credibility_adjustment': credibility_adjustment,
        'final_confidence': calibrated_confidence
    }
    
    return {
        'calibrated_confidence': calibrated_confidence,
        'confidence_change': calibrated_confidence - base_confidence,
        'calibration_factors': calibration_factors,
        'source_diversity': list(source_types),
        'consensus_strength': consensus_strength
    }

def get_final_verdict_with_confidence(aggregate_result: Dict, calibration_result: Dict) -> Dict[str, any]:
    """Get final verdict with calibrated confidence"""
    final_confidence = calibration_result['calibrated_confidence']
    verdict = aggregate_result['verdict']
    
    # Get updated confidence badge
    confidence_badge = get_confidence_badge(verdict, final_confidence)
    
    # Determine verdict certainty level
    if final_confidence >= 0.8:
        certainty = 'Very High'
    elif final_confidence >= 0.7:
        certainty = 'High'
    elif final_confidence >= 0.5:
        certainty = 'Medium'
    elif final_confidence >= 0.3:
        certainty = 'Low'
    else:
        certainty = 'Very Low'
    
    return {
        'verdict': verdict,
        'confidence': final_confidence,
        'confidence_badge': confidence_badge,
        'certainty_level': certainty,
        'verdict_summary': f"{confidence_badge['emoji']} {verdict} ({certainty} Confidence)"
    }

# Test confidence calibration
calibration_result = calibrate_confidence_score(aggregate_result, citations)
final_verdict = get_final_verdict_with_confidence(aggregate_result, calibration_result)

print("Confidence calibration results:")
print(f"Original confidence: {aggregate_result['confidence']:.3f}")
print(f"Calibrated confidence: {calibration_result['calibrated_confidence']:.3f}")
print(f"Confidence change: {calibration_result['confidence_change']:+.3f}")
print(f"\nCalibration factors:")
for factor, value in calibration_result['calibration_factors'].items():
    print(f"  {factor}: {value:.3f}")

print(f"\nFinal verdict: {final_verdict['verdict_summary']}")
print(f"Certainty level: {final_verdict['certainty_level']}")
print(f"Source diversity: {calibration_result['source_diversity']}")

## Step 7: Complete Phase 4 Pipeline

In [None]:
def phase4_pipeline(claim_evidence_data: Dict) -> Dict[str, any]:
    """Complete Phase 4 pipeline: Verification & Scoring"""
    pipeline_result = {
        'phase': 'Phase 4 - Verification & Scoring',
        'input_claim': claim_evidence_data['claim'],
        'steps': []
    }
    
    try:
        claim = claim_evidence_data['claim']
        evidence_sources = claim_evidence_data['evidence_sources']
        
        # Step 1: Analyze stance for each evidence source
        stance_results = []
        for evidence in evidence_sources:
            stance_result = analyze_claim_evidence_stance(claim, evidence)
            stance_results.append(stance_result)
        
        pipeline_result['steps'].append({
            'step': 'stance_analysis',
            'result': {
                'evidence_analyzed': len(stance_results),
                'stance_distribution': {}
            }
        })
        
        # Count stance distribution
        stance_counts = {}
        for result in stance_results:
            stance = result['stance']
            stance_counts[stance] = stance_counts.get(stance, 0) + 1
        pipeline_result['steps'][0]['result']['stance_distribution'] = stance_counts
        
        # Step 2: Calculate aggregate verdict
        aggregate_result = calculate_aggregate_verdict(stance_results)
        pipeline_result['steps'].append({
            'step': 'aggregate_verdict',
            'result': aggregate_result
        })
        
        # Step 3: Extract citations
        citations = extract_citations(stance_results)
        citation_summary = format_citation_summary(citations)
        pipeline_result['steps'].append({
            'step': 'citation_extraction',
            'result': citation_summary
        })
        
        # Step 4: Calibrate confidence
        calibration_result = calibrate_confidence_score(aggregate_result, citations)
        pipeline_result['steps'].append({
            'step': 'confidence_calibration',
            'result': calibration_result
        })
        
        # Step 5: Generate final verdict
        final_verdict = get_final_verdict_with_confidence(aggregate_result, calibration_result)
        pipeline_result['steps'].append({
            'step': 'final_verdict',
            'result': final_verdict
        })
        
        # Final output for Phase 5
        pipeline_result['final_output'] = {
            'claim': claim,
            'verdict': final_verdict['verdict'],
            'confidence': final_verdict['confidence'],
            'confidence_badge': final_verdict['confidence_badge'],
            'certainty_level': final_verdict['certainty_level'],
            'citations': citation_summary['citations'],
            'evidence_summary': {
                'total_sources': len(evidence_sources),
                'stance_distribution': stance_counts,
                'avg_credibility': sum(r['credibility_score'] for r in stance_results) / len(stance_results) if stance_results else 0
            },
            'ready_for_phase5': True
        }
        pipeline_result['status'] = 'success'
        
    except Exception as e:
        pipeline_result['error'] = str(e)
        pipeline_result['status'] = 'failed'
    
    return pipeline_result

# Test complete Phase 4 pipeline
print("=== Testing Complete Phase 4 Pipeline ===")
phase4_result = phase4_pipeline(test_claim_evidence)

# Print summary
print(f"Status: {phase4_result['status']}")
final_output = phase4_result['final_output']
print(f"\nClaim: {final_output['claim']}")
print(f"Verdict: {final_output['verdict']}")
print(f"Confidence: {final_output['confidence']:.3f}")
print(f"Badge: {final_output['confidence_badge']['emoji']} {final_output['confidence_badge']['label']}")
print(f"Certainty: {final_output['certainty_level']}")

print(f"\nEvidence Summary:")
print(f"  Total sources: {final_output['evidence_summary']['total_sources']}")
print(f"  Stance distribution: {final_output['evidence_summary']['stance_distribution']}")
print(f"  Average credibility: {final_output['evidence_summary']['avg_credibility']:.3f}")

print(f"\nTop Citations:")
for citation in final_output['citations']:
    print(f"  {citation['display_title']}")
    print(f"    Source: {citation['source']} (Confidence: {citation['confidence_percent']})")
    print(f"    Snippet: {citation['highlighted_snippet']}")

print(f"\nReady for Phase 5: {final_output['ready_for_phase5']}")