In [None]:
%pip install joblib nltk requests bs4 ipywidgets xgboost pandas numpy seaborn scikit-learn

In [1]:
import ipywidgets as widgets
from IPython.display import display

# Create input widget
link_input = widgets.Text(
    placeholder='Enter article link',
    description='Link:',
)

# Create a button widget
fetch_button = widgets.Button(
    description='Fetch Article',
)

# Create a variable to store the link
article_link = None

# Define a function to save the link to the variable
def save_article_link(b):
    global article_link
    article_link = link_input.value

# Attach the function to the button click event
fetch_button.on_click(save_article_link)

# Display the widgets
display(link_input, fetch_button)


Text(value='', description='Link:', placeholder='Enter article link')

Button(description='Fetch Article', style=ButtonStyle())

In [61]:
import requests
from bs4 import BeautifulSoup

# Define a function to fetch and process the article
def fetch_and_process_article():
    # Check if the article link has been provided
    if article_link is None:
        print("Please enter the article link and click 'Fetch Article' in the previous cell.")
        return
    
    try:
        # Fetch the article
        response = requests.get(article_link)
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Find the title
        title_element = soup.find("h1")
        title = title_element.text if title_element else "Title not found"
        
        # Find the text content
        content_element = soup.find("p")
        text = content_element.text if content_element else "Text not found"
        
        # Display the results
        print(f"Title: {title}")
        print(f"Text: {text}")
    except Exception as e:
        print(f"Error: {e}")

# Call the function to fetch and process the article
fetch_and_process_article()


Title: Australia: Sydney bans fires - including barbecues - as hot and dry conditions bring wildfire risk
Text: A complete ban on fires is declared for the Greater Sydney area and coastal communities to the south, where 20 schools have been closed.


In [62]:
import requests
from bs4 import BeautifulSoup
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import joblib
import xgboost
from sklearn.feature_extraction.text import TfidfVectorizer

# Fetch and process the article
def fetch_article(article_link):
    try:
        # Fetch the article
        response = requests.get(article_link)
        soup = BeautifulSoup(response.text, "html.parser")

        # Find the title
        title_element = soup.find("h1")
        title = title_element.text if title_element else "Title not found"

        return title
    except Exception as e:
        print(f"Error: {e}")
        return None, None

# Preprocess text using NLTK
def preprocess_text(text):
    # Tokenize the text
    tokens = word_tokenize(text)

    # Remove stop words
    stop_words = set(stopwords.words("english"))
    tokens = [token for token in tokens if token.lower() not in stop_words]

    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]

    # Join the tokens back into a single string
    preprocessed_text = " ".join(tokens)

    return preprocessed_text

# Load the saved vectorizer
vectorizer = joblib.load("onion_vectorizer.pkl")

# load classifier
classifier = joblib.load("onion_xgboost_classifier.pkl")

def classify_article(article_link, classifier):
    title = fetch_article(article_link)
    
    if title is not None:
        
        # Preprocess the article text
        preprocessed_article = preprocess_text(title)
        
        # Transform the input data using the same vectorizer
        input_tfidf = vectorizer.transform([preprocessed_article])
        
        # Predict using the classifier
        prediction = classifier.predict(input_tfidf)

        # Output the prediction (0 for fake, 1 for real)
        if prediction[0] == 0:
            return "The article is classified as FAKE."
        else:
            return "The article is classified as REAL."
    else:
        return "Article processing failed."

result = classify_article(article_link, classifier)
print(result)


The article is classified as REAL.
