In [12]:
from urllib.parse import urlparse
import tldextract
from typing import Dict, Union

In [13]:


def evaluate_reference_credibility(url: str) -> Dict[str, Union[float, str]]:
    """
    Basic URL credibility evaluation based on domain type and known reliable sources.
    
    Args:
        url (str): The URL to evaluate
        
    Returns:
        dict: Contains 'score' (float) and 'explanation' (str)
    """
    try:
        # Parse the URL
        extracted = tldextract.extract(url)
        domain = f"{extracted.domain}.{extracted.suffix}"
        
        # Define trusted domains and their scores
        trusted_domains = {
            'edu': 0.9,    # Educational institutions
            'gov': 0.9,    # Government websites
            'org': 0.7,    # Non-profit organizations
        }
        
        # Define trusted sources
        trusted_sources = {
            'nature.com': 0.9,
            'science.org': 0.9,
            'scholar.google.com': 0.8,
        }
        
        # Basic scoring logic
        score = 0.5  # Default score
        explanation = []
        
        # Check domain type
        if extracted.suffix in trusted_domains:
            score = trusted_domains[extracted.suffix]
            explanation.append(f"Trusted domain type ({extracted.suffix})")
        
        # Check for trusted sources
        if domain in trusted_sources:
            score = trusted_sources[domain]
            explanation.append(f"Recognized trusted source ({domain})")
            
        return {
            "score": score,
            "explanation": " | ".join(explanation) if explanation else "Basic domain evaluation"
        }
        
    except Exception as e:
        return {
            "score": 0.0,
            "explanation": f"Error evaluating URL: {str(e)}"
        }

In [14]:
def evaluate_fact_check(url: str) -> Dict[str, Union[float, str]]:
    """
    Basic fact-checking evaluation based on known fact-checking sources and scientific journals.
    
    Args:
        url (str): The URL to evaluate
        
    Returns:
        dict: Contains 'fact_check_score' (float) and 'explanation' (str)
    """
    try:
        # Parse the URL
        parsed_url = urlparse(url)
        domain = parsed_url.netloc.lower()
        
        # Remove 'www.' if present
        if domain.startswith('www.'):
            domain = domain[4:]
            
        # Define fact-checking sources and their scores
        fact_check_sources = {
            'snopes.com': 0.9,
            'factcheck.org': 0.9,
            'politifact.com': 0.85,
            'reuters.com': 0.85,
            'apnews.com': 0.85,
            'nature.com': 0.95,       # Peer-reviewed scientific journal
            'science.org': 0.95,      # Peer-reviewed scientific journal
            'thelancet.com': 0.95,    # Medical journal
        }
        
        # Basic scoring logic
        score = 0.5  # Default score
        explanation = []
        
        # Check if it's a known fact-checking or scientific source
        for source, source_score in fact_check_sources.items():
            if domain == source or domain.endswith('.' + source):
                score = source_score
                if source in ['nature.com', 'science.org', 'thelancet.com']:
                    explanation.append(f"Peer-reviewed scientific source ({source})")
                else:
                    explanation.append(f"Recognized fact-checking source ({source})")
                break
                
        # Additional check for scientific article patterns
        path = parsed_url.path.lower()
        if any(x in path for x in ['/article/', '/research/', '/study/', '/paper/']):
            score += 0.1
            explanation.append("Contains scientific article indicators")
            score = min(score, 1.0)  # Cap at 1.0
            
        return {
            "fact_check_score": score,
            "explanation": " | ".join(explanation) if explanation else "Basic fact-check evaluation"
        }
        
    except Exception as e:
        return {
            "fact_check_score": 0.0,
            "explanation": f"Error evaluating URL: {str(e)}"
        }

In [15]:
"""
Main method to call evaluate_reference_credibility function and evaluate_reference_credibility and return the scores
"""

url = "https://www.nature.com/articles/s41586-020-2649-2"
result_domain = evaluate_reference_credibility(url)
result_fact_check = evaluate_fact_check(url)
print(result_domain)
print(result_fact_check)


{'score': 0.9, 'explanation': 'Recognized trusted source (nature.com)'}
{'fact_check_score': 0.95, 'explanation': 'Peer-reviewed scientific source (nature.com)'}
