In [1]:
# Install newspaper3k for article extraction
!pip install newspaper3k

# Install Hugging Face Transformers for text classification
!pip install transformers

# Install scikit-learn for TF-IDF vectorization
!pip install scikit-learn

# (Optional) Install Streamlit (useful locally, but not in Colab directly)
# !pip install streamlit

# If you plan to run a Streamlit app in Colab, you can use this (optional workaround):
# !pip install pyngrok

!pip install lxml_html_clean



In [2]:
from newspaper import Article

def extract_text_from_url(url):
    try:
      # Creates Article object to parse the content of the page
        article = Article(url)
      # Download article's HTML
        article.download()
      # Parsing the data from HTML
        article.parse()
      # Returns main body of the text as a string
        return article.text
    except Exception as e:
        return f"Error extracting article: {e}"

# Example usage:
url = "https://www.bbc.com/news/live/c70wjz2j04wt"
article_text = extract_text_from_url(url)
print(article_text)

Trump threatens tariffs on Russia. But what does that mean?

On Sunday, Trump threatened "secondary tariffs" on Russia if it doesn't agree to a ceasefire with Ukraine. Let's unpack that a little.

What did Trump say?

"If Russia and I are unable to make a deal on stopping the bloodshed in Ukraine, and if I think it was Russia's fault , which it might not be... I am going to put secondary tariffs... on all oil coming out of Russia," the US president told NBC News.

"That would be that if you buy oil from Russia, you can’t do business in the United States," Trump added. "There will be a 25% tariff on all oil, a 25 to 50-point tariff on all oil."

He also described it as a "25% tariff on oil and other products sold in the United States, secondary tariffs".

What is a secondary tariff?

When a government imposes trade tariffs it means it is taxing the importer of goods. It's effectively an entrance fee for products, usually set at a percentage of their value.

This is not the first time Tr

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer

def preprocess_text(text):
  # Removes stop word in English like and, the; features limits the number of words to 5000
    vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
  # fit_transform() learns the vocab from the text, and transforms it into the sparse matrix
  # input is wrapped in [] as it expects an itearable
    return vectorizer.fit_transform([text])

preprocessed_text = preprocess_text(article_text)

In [5]:
# from transformers import pipeline
# fake_news_detector = pipeline("text-classification", model=model_name, tokenizer=model_name)

# Add this immediately after
label_map = {
    "LABEL_0": "❌ Fake",
    "LABEL_1": "✅ Real"
}

In [6]:
# STEP 1: Install dependencies
!pip install -q transformers huggingface_hub

# STEP 2: Authenticate with your Hugging Face token (paste it when prompted)
from huggingface_hub import login
login()  # You'll paste your Hugging Face token here (from https://huggingface.co/settings/tokens)

# STEP 3: Import the pipeline
from transformers import pipeline

# STEP 4: Load the fake news detection model (public + accessible)
model_name = "Pulk17/Fake-News-Detection"
fake_news_detector = pipeline("text-classification", model=model_name, tokenizer=model_name)

# STEP 5: Create a function to predict if the news is fake or real
def predict_fake_news(text):
    result = fake_news_detector(text[:1000])
    raw_label = result[0]['label']
    return label_map.get(raw_label, raw_label)

# STEP 6: Test it with an example
example_text = "Trump Orders All Children Born Under Biden To Be Renamed After Confederate Generals"
prediction = predict_fake_news(example_text)
print("Prediction:", prediction)  # Should return either 'LABEL_0' (Fake) or 'LABEL_1' (Real)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu


Prediction: ❌ Fake


In [None]:
!pip install streamlit



In [7]:
import streamlit as st

st.title("Fake News Detection App")
st.subheader("Paste a news article link to check if it's fake or real")

url = st.text_input("Enter News Article URL:")
# Creates a button
if st.button("Analyze"):
  # If contains a non-empty value execute
    if url:
      # Call the defined function
        article_text = extract_text_from_url(url)
        # If contains a non-empty value execute
        if article_text:
          # Uses the Hugging Face pipeline to make a prediction
            prediction = fake_news_detector(article_text[:1000])
          # Extracts the label
            raw_label = prediction[0]['label']
            label = label_map.get(raw_label, raw_label)  # fallback to original if missing
            st.subheader(f"Prediction: {label}")

        else:
            st.error("Could not extract text from the URL. Try another link.")

2025-03-31 13:59:16.367 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-03-31 13:59:16.402 Session state does not function when running a script without `streamlit run`


In [None]:
!pip install -q streamlit pyngrok newspaper3k transformers

In [8]:
code = '''
import streamlit as st
from newspaper import Article
from transformers import pipeline

# Function to extract article text from URL
def extract_text_from_url(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        return article.text
    except Exception as e:
        return None

# Load Hugging Face fake news model
model_name = "jy46604790/Fake-News-Bert-Detect"
fake_news_detector = pipeline("text-classification", model=model_name, tokenizer=model_name)

# 🧠 Add label mapping
label_map = {
    "LABEL_0": "❌ Fake",
    "LABEL_1": "✅ Real"
}

st.title("📰 Fake News Detection App")
st.subheader("Paste a news article link to check if it's Fake or Real")

url = st.text_input("Enter News Article URL:")

if st.button("Analyze"):
    if url:
        article_text = extract_text_from_url(url)
        if article_text:
            prediction = fake_news_detector(article_text)
            raw_label = prediction[0]['label']
            label = label_map.get(raw_label, raw_label)  # Convert LABEL_0/1 → Fake/Real
            st.subheader(f"Prediction: {label}")
        else:
            st.error("Could not extract text from the URL. Try another link.")
'''

with open("app.py", "w") as f:
    f.write(code)

In [9]:
from pyngrok import ngrok
import time
import threading

# (Only needed once — if you haven't run this before)
ngrok.set_auth_token("2v4euSkzqsc1ud1u94EeDOkf56o_VzgWakVyZ4KTH4V2vvxS")  # Replace with your actual token

# Start the Streamlit app in a background thread
def run_streamlit():
    !streamlit run app.py

thread = threading.Thread(target=run_streamlit)
thread.start()

# Wait for Streamlit to launch
time.sleep(10)

# ✅ Connect to localhost:8501 instead of using port=
public_url = ngrok.connect("http://localhost:8501")
print("✅ Streamlit app is running at:", public_url)


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.55.90.78:8501[0m
[0m
✅ Streamlit app is running at: NgrokTunnel: "https://ca98-34-55-90-78.ngrok-free.app" -> "http://localhost:8501"


In [None]:
!pkill -f streamlit