In [9]:
from urllib.parse import urlparse
import tldextract
from typing import Dict, Union

In [10]:


def evaluate_reference_credibility(url: str) -> Dict[str, Union[float, str]]:
    """
    Basic URL credibility evaluation based on domain type and known reliable sources.
    
    Args:
        url (str): The URL to evaluate
        
    Returns:
        dict: Contains 'score' (float) and 'explanation' (str)
    """
    try:
        # Parse the URL
        extracted = tldextract.extract(url)
        domain = f"{extracted.domain}.{extracted.suffix}"
        
        # Define trusted domains and their scores
        trusted_domains = {
            'edu': 0.9,    # Educational institutions
            'gov': 0.9,    # Government websites
            'org': 0.7,    # Non-profit organizations
        }
        
        # Define trusted sources
        trusted_sources = {
            'nature.com': 0.9,
            'science.org': 0.9,
            'scholar.google.com': 0.8,
        }
        
        # Basic scoring logic
        score = 0.5  # Default score
        explanation = []
        
        # Check domain type
        if extracted.suffix in trusted_domains:
            score = trusted_domains[extracted.suffix]
            explanation.append(f"Trusted domain type ({extracted.suffix})")
        
        # Check for trusted sources
        if domain in trusted_sources:
            score = trusted_sources[domain]
            explanation.append(f"Recognized trusted source ({domain})")
            
        return {
            "score": score,
            "explanation": " | ".join(explanation) if explanation else "Basic domain evaluation"
        }
        
    except Exception as e:
        return {
            "score": 0.0,
            "explanation": f"Error evaluating URL: {str(e)}"
        }

In [11]:
url = "https://www.nature.com/articles/s41586-020-2649-2"
result = evaluate_reference_credibility(url)
print(result)


{'score': 0.9, 'explanation': 'Recognized trusted source (nature.com)'}
