In [9]:
#@title Ensure Dependancies Installed
#%pip install requests beautifulsoup4

#Import libraries
import requests
from bs4 import BeautifulSoup
import time


In [10]:
#Define Utility Functions

#Article Scrapper
def scrape_article_text(url):
    # Send a GET request to the URL
    response = requests.get(url)
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        # Find elements containing text - this part may need customization
        # For example, assuming article text is within <p> tags
        article_text = ' '.join(p.get_text() for p in soup.find_all('p'))
        return article_text
    else:
        return "Error: Unable to fetch the webpage."

#Query for LLM
def query(payload, headers):
    response = requests.post(API_URL, headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()  # Expecting this to always be a list for simplicity
    else:
        return [{"error": response.text}]

#Political Bias Classification using LLM 
def reduce_and_query(initial_text, llm_instructions, reduction_fraction, headers):
    text = initial_text
    while True:
        payload = {"inputs": llm_instructions + text}
        response = query(payload, headers)
        # Since we expect a list, check the first item for an 'error' key
        if "error" in response[0]:
            error_message = response[0]["error"]
            if "token" in error_message.lower():
                # Reduce the text by the specified fraction if it's a token limit error
                new_length = int(len(text) * (1 - reduction_fraction))
                text = text[:new_length]
            else:
                # Return the error message directly if it's a different kind of error
                return error_message
        else:
            # If there's no 'error' key, assume the response is successful
            return response

In [6]:
#Test case
# Change URL to get article text
url = "https://www.foxnews.com/politics/biden-running-time-fulfill-2020-campaign-pledge-abolish-federal-death-penalty"
article_text = scrape_article_text(url)
print(article_text)



      This material may not be published, broadcast, rewritten,
      or redistributed. ©2024 FOX News Network, LLC. All rights reserved.
      Quotes displayed in real-time or delayed by at least 15 minutes. Market data provided by
      Factset. Powered and implemented by
      FactSet Digital Solutions.
      Legal Statement. Mutual Fund and ETF data provided by
      Refinitiv Lipper.
     President Biden mistakenly referred to his "2020 agenda" going into 2024 during an interview on Monday. President Biden is running out of time to fulfill his 2020 presidential campaign promise to abolish the death penalty, leaving an opening for his opposition to capitalize on the issue. During Biden’s 2020 campaign, he vowed to end the federal death penalty and, after taking office three years ago, his incoming administration considered several possible options to do so. However, none came to fruition, and there are about eight months until the November election. While Biden has yet to act on c

In [21]:
api_token_hug = 


# Following URL is the URL of the LLM being utilized from HuggingFace
API_URL = "https://api-inference.huggingface.co/models/google/flan-t5-xxl"
headers = {"Authorization": f"Bearer {api_token_hug}"}

# Instructions sent to the LLM in front of article text
system_input = "Score the following article from -1 to 1 on its party stance where -1 is Republican messaging and 1 is Democratic messaging and 0 is neutral: "

#reduce by 10% until it fits query
reduction_fraction = 0.10



result = reduce_and_query(article_text, system_input, reduction_fraction, headers)
print(result)

second_article = "https://www.cnn.com/2024/03/19/politics/texas-immigration-law-blocked-appeals/index.html"
second_result = reduce_and_query(second_article, system_input, reduction_fraction, headers)
print(second_result)

third_article = "https://www.cnn.com/videos/world/2024/03/20/israel-gaza-west-bank-settler-movement-clarissa-ward-pkg-intl-ldn-vpx.cnn"
third_result = reduce_and_query(third_article, system_input, reduction_fraction, headers)
print(third_result)

fourth_article = "https://www.cnn.com/2024/03/19/politics/trump-bond-deadline-panic/index.html"
fourth_result = reduce_and_query(fourth_article, system_input, reduction_fraction, headers)
print(fourth_result)

fifth_article = "https://www.foxnews.com/live-news/joe-biden-gop-impeachment-inquiry-hearing-hunter-biden-business-dealings"
fifth_result = reduce_and_query(fifth_article, system_input, reduction_fraction, headers)
print(fifth_result)

sixth_article = "https://www.msnbc.com/deadline-white-house/deadline-legal-blog/trump-supreme-court-immunity-appeal-delay-rcna144155"
sixth_result = reduce_and_query(sixth_article, system_input, reduction_fraction, headers)
print(sixth_result)

seventh_article = "https://www.msnbc.com/rachel-maddow-show/maddowblog/biden-white-house-reason-celebrate-falling-crime-rates-rcna144215"
seventh_result = reduce_and_query(seventh_article, system_input, reduction_fraction, headers)
print(seventh_result)



[{'generated_text': '0'}]
[{'generated_text': '0'}]
[{'generated_text': '-1'}]
[{'generated_text': '-1'}]
[{'generated_text': '-1'}]
[{'generated_text': '0'}]
[{'generated_text': '1'}]
