In [1]:
import os
import requests
from dotenv import load_dotenv
from transformers import pipeline

# Load environment variables from .env file
load_dotenv()

# Load News API Key
NEWS_API_KEY = os.getenv('NEWS_API_KEY')
if not NEWS_API_KEY:
    raise ValueError("NEWS_API_KEY is not set in the environment variables")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Pipelines for sentiment analysis, summarization, and bias detection
sentiment_analysis = pipeline(
    'sentiment-analysis', 
    model="nlptown/bert-base-multilingual-uncased-sentiment"
)
summarizer = pipeline(
    'summarization', 
    model="facebook/bart-large-cnn"
)
bias_detector = pipeline(
    "zero-shot-classification", 
    model="facebook/bart-large-mnli"
)

Device set to use cpu
Device set to use cpu
Device set to use cpu


In [3]:
def fetch_news(domain):
    """Fetch news articles from NewsAPI for a given domain."""
    url = f'https://newsapi.org/v2/top-headlines?category={domain}&apiKey={NEWS_API_KEY}'
    response = requests.get(url)
    return response.json()

def analyze_bias(article_text):
    """Analyze bias using zero-shot classification with candidate labels."""
    labels = ["left-leaning", "right-leaning", "neutral"]
    result = bias_detector(article_text, candidate_labels=labels)
    return result

def compute_credibility_score(article_text):
    """
    Compute a credibility score based on:
      - Bias neutrality (weighted at 90%).
      - Sentiment balance (weighted at 10%).
    This adjustment increases the overall score if the article is neutral.
    The score is scaled to a percentage.
    """
    bias_result = bias_detector(article_text, candidate_labels=["left-leaning", "right-leaning", "neutral"])
    neutral_score = 0
    for label, score in zip(bias_result['labels'], bias_result['scores']):
        if label.lower() == "neutral":
            neutral_score = score
            break

    sentiment_result = sentiment_analysis(article_text)[0]
    try:
        # Expecting a label like "4 stars"
        star_rating = int(sentiment_result['label'][0])
    except (ValueError, IndexError):
        star_rating = 3  # Default to neutral if parsing fails

    # Calculate how close the rating is to neutral (3 stars)
    sentiment_balance = 1 - (abs(star_rating - 3) / 2)  # Normalized between 0 and 1

    # Increase weight on neutrality to boost credibility for neutral reporting
    credibility = (neutral_score * 0.9 + sentiment_balance * 0.1) * 100
    return round(credibility, 2)

def compare_articles(domain):
    """
    For each article in the specified domain:
      - Summarize its content if it's at least 50 words long.
      - Analyze bias.
      - Compute a credibility score.
    Returns a list of dictionaries with these details.
    """
    news = fetch_news(domain)
    articles = news.get('articles', [])
    comparison = []
    
    for article in articles:
        # Use article 'content' if available, otherwise 'description'
        text = article.get('content') or article.get('description') or ""
        if not text:
            continue
        
        # Check if the article text is less than 50 words
        word_count = len(text.split())
        
        if word_count < 50:
            summary = text  # Use the text as it is
        else:
            try:
                summary = summarizer(text, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
            except Exception:
                summary = "Summary not available."
        
        bias = analyze_bias(text)
        credibility = compute_credibility_score(text)
        
        comparison.append({
            "title": article.get("title"),
            "source": article.get("source", {}).get("name"),
            "summary": summary,
            "bias": bias,
            "credibility_score": credibility
        })
    
    return comparison

def aggregate_and_summarize_articles(domain, credibility_threshold=50):
    """
    Aggregates articles from multiple sources in the given domain.
      - Filters out articles with a credibility score below the threshold (to discard fake news).
      - Concatenates the remaining articles into one aggregated text.
      - Summarizes the aggregated text to produce a single, balanced summary.
      - Computes overall bias and an average credibility score.
    """
    news = fetch_news(domain)
    articles = news.get("articles", [])
    valid_texts = []
    for article in articles:
        text = article.get("content") or article.get("description") or ""
        if not text:
            continue
        cred = compute_credibility_score(text)
        if cred >= credibility_threshold:
            valid_texts.append(text)
    if not valid_texts:
        return {"error": "No credible articles found."}
    
    # Aggregate valid articles into one combined text
    aggregated_text = " ".join(valid_texts)
    try:
        summary = summarizer(aggregated_text, max_length=150, min_length=100, do_sample=False)[0]['summary_text']
    except Exception as e:
        summary = "Summary not available: " + str(e)
    
    overall_bias = analyze_bias(aggregated_text)
    avg_credibility = sum([compute_credibility_score(text) for text in valid_texts]) / len(valid_texts)
    return {
        "aggregated_summary": summary,
        "overall_bias": overall_bias,
        "average_credibility_score": round(avg_credibility, 2),
        "articles_count": len(valid_texts)
    }

def news_app_interaction(action, domain="general", article_text=None, credibility_threshold=50):
    """
    Main interface for testing actions:
      - fetch_news: Retrieves raw articles from a specified domain.
      - summarize: Summarizes the provided article text.
      - bias: Performs bias analysis on the provided article text.
      - credibility: Computes the credibility score for the provided article text.
      - compare: Provides summaries, bias, and credibility for each article.
      - aggregate: Aggregates articles from multiple sources, filters out low credibility (fake) news,
                   and produces a final aggregated summary.
    """
    if action == "fetch_news":
        return fetch_news(domain)
    elif action == "summarize":
        if not article_text:
            return {"error": "Article text is required for summarization"}
        summary = summarizer(article_text, max_length=100, min_length=50, do_sample=False)
        return {"summary": summary[0]['summary_text']}
    elif action == "bias":
        if not article_text:
            return {"error": "Article text is required for bias detection"}
        return {"bias": analyze_bias(article_text)}
    elif action == "credibility":
        if not article_text:
            return {"error": "Article text is required for credibility scoring"}
        return {"credibility_score": compute_credibility_score(article_text)}
    elif action == "compare":
        return {"comparison": compare_articles(domain)}
    elif action == "aggregate":
        return {"aggregated_result": aggregate_and_summarize_articles(domain, credibility_threshold)}
    else:
        return {"error": "Invalid action specified"}



In [4]:
# 1. Fetch and display news for a given domain (e.g., technology)
domain = "technology"
news_data = news_app_interaction("fetch_news", domain=domain)
print("Fetched News:")
print(news_data)

Fetched News:
{'status': 'ok', 'totalResults': 17, 'articles': [{'source': {'id': None, 'name': '9to5google.com'}, 'author': 'Ben Schoon', 'title': 'Nothing Phone (3a) and Phone (3a) Pro leak in full [Gallery] - 9to5Google', 'description': 'Images of the Nothing Phone (3a) and Phone (3a) Pro have hit the web, revealing the new design in full of both devices.', 'url': 'http://9to5google.com/2025/02/20/nothing-phone-3a-series-images-leak/', 'urlToImage': 'https://i0.wp.com/9to5google.com/wp-content/uploads/sites/4/2025/02/nothing-phone-3a-series-ah-leak-1.jpg?resize=1200%2C628&quality=82&strip=all&ssl=1', 'publishedAt': '2025-02-21T02:01:00Z', 'content': 'Ahead of the March 4 launch, a full collection of images of the both the Nothing Phone (3a) and Phone (3a) Pro have hit the web, seemingly revealing the full design and core specs.\r\nNothing has been … [+1958 chars]'}, {'source': {'id': None, 'name': '9to5Mac'}, 'author': 'Michael Burkhardt', 'title': 'Here’s how the iPhone 16e compare

In [5]:
# 2. Summarize a sample text
sample_text = (
    "Artificial intelligence is rapidly advancing, transforming industries and creating new opportunities. "
    "Experts believe AI will play a crucial role in the future of technology."
)
summarized_text = news_app_interaction("summarize", article_text=sample_text)
print("\nSummarized Text:")
print(summarized_text)

Your max_length is set to 100, but your input_length is only 29. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)



Summarized Text:
{'summary': 'Artificial intelligence is rapidly advancing, transforming industries and creating new opportunities. Experts believe AI will play a crucial role in the future of technology. Artificial intelligence will be used in a variety of industries, including finance, medicine, business and education.'}


In [6]:
# 3. Analyze bias for the sample text
bias_result = news_app_interaction("bias", article_text=sample_text)
print("\nBias Analysis:")
print(bias_result)



Bias Analysis:
{'bias': {'sequence': 'Artificial intelligence is rapidly advancing, transforming industries and creating new opportunities. Experts believe AI will play a crucial role in the future of technology.', 'labels': ['right-leaning', 'neutral', 'left-leaning'], 'scores': [0.47262051701545715, 0.3575516939163208, 0.16982783377170563]}}


In [7]:
# 4. Compute credibility score for the sample text
credibility_result = news_app_interaction("credibility", article_text=sample_text)
print("\nCredibility Score:")
print(credibility_result)


Credibility Score:
{'credibility_score': 32.18}


In [8]:
# 5. Multi-source comparison: summarize, analyze bias, and compute credibility for each article
comparison_result = news_app_interaction("compare", domain=domain)
print("\nMulti-Source Comparison:")
print(comparison_result)


Multi-Source Comparison:
{'comparison': [{'title': 'Nothing Phone (3a) and Phone (3a) Pro leak in full [Gallery] - 9to5Google', 'source': '9to5google.com', 'summary': 'Ahead of the March 4 launch, a full collection of images of the both the Nothing Phone (3a) and Phone (3a) Pro have hit the web, seemingly revealing the full design and core specs.\r\nNothing has been … [+1958 chars]', 'bias': {'sequence': 'Ahead of the March 4 launch, a full collection of images of the both the Nothing Phone (3a) and Phone (3a) Pro have hit the web, seemingly revealing the full design and core specs.\r\nNothing has been … [+1958 chars]', 'labels': ['right-leaning', 'neutral', 'left-leaning'], 'scores': [0.468912810087204, 0.3357163965702057, 0.19537073373794556]}, 'credibility_score': 30.21}, {'title': 'Here’s how the iPhone 16e compares to iPhone 12, 13, and more - 9to5Mac', 'source': '9to5Mac', 'summary': 'Brought to you by Uniq:\xa0Check out the new rugged cases from Uniq including the glow in the d

In [10]:
# 6. Aggregate articles, filter out low-credibility ones, and generate a final summary.
aggregated_result = news_app_interaction("aggregate", domain=domain, credibility_threshold=50)
print("\nAggregated and Summarized Articles:")
print(aggregated_result)



Aggregated and Summarized Articles:
{'aggregated_result': {'error': 'No credible articles found.'}}
