In [2]:
from __future__ import annotations

Search Engine Code: 

<script async src="https://cse.google.com/cse.js?cx=952f538231ed24a44">
</script>
<div class="gcse-search"></div>

In [3]:
API_KEY = "AIzaSyBQ0POxINeG_1AuvDyq7HWRCrzuBFqqBKw"
CX = "952f538231ed24a44"

In [16]:
from __future__ import annotations
import requests
import re
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from dataclasses import dataclass
from typing import List, Dict, Any
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


# ============================
# CONFIG
# ============================
MODEL_NAME = "roberta-large-mnli"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

TAU_ENTAIL = 0.65
TAU_CONTRA = 0.65
SIMILARITY_THRESHOLD = 0.15  # relevance filter
TOP_K = 10


# ============================
# INPUT
# ============================
user_text = input("Enter headline: ")

query = user_text.replace(" ", "+")
url = f"https://www.googleapis.com/customsearch/v1?q={query}&cx={CX}&key={API_KEY}"
response = requests.get(url).json()
items = response.get("items", [])

search_results = []
for item in items:
    combined = f"{item.get('title','')} {item.get('snippet','')}"
    if combined.strip():
        search_results.append(combined)

if not search_results:
    print("FINAL VERDICT: NOT ENOUGH INFO")
    exit()


# ============================
# UTILITIES
# ============================
def normalize(s: str) -> str:
    s = s.lower().strip()
    s = re.sub(r"\s+", " ", s)
    return s


def relevance_filter(claim: str, articles: List[str]) -> List[str]:
    vec = TfidfVectorizer(stop_words="english")
    tfidf = vec.fit_transform([claim] + articles)
    sims = cosine_similarity(tfidf[0:1], tfidf[1:]).flatten()

    return [
        articles[i]
        for i in range(len(articles))
        if sims[i] >= SIMILARITY_THRESHOLD
    ]


# ============================
# NLI
# ============================
@dataclass
class NLIResult:
    article: str
    entailment: float
    contradiction: float
    neutral: float
    verdict: str


class NLIChecker:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        self.model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(DEVICE)
        self.model.eval()
        self.labels = ["contradiction", "neutral", "entailment"]

    @torch.no_grad()
    def score(self, premise: str, hypothesis: str) -> Dict[str, float]:
        enc = self.tokenizer(
            normalize(premise),
            normalize(hypothesis),
            return_tensors="pt",
            truncation=True,
            max_length=256,
        ).to(DEVICE)

        probs = F.softmax(self.model(**enc).logits, dim=-1)[0].cpu().tolist()
        return dict(zip(self.labels, probs))

    def bidirectional(self, article: str, claim: str) -> NLIResult:
        a1 = self.score(article, claim)
        a2 = self.score(claim, article)

        entail = max(a1["entailment"], a2["entailment"])
        contra = max(a1["contradiction"], a2["contradiction"])
        neutral = max(a1["neutral"], a2["neutral"])

        if contra >= TAU_CONTRA:
            verdict = "CONTRADICTS"
        elif entail >= TAU_ENTAIL:
            verdict = "SUPPORTS"
        else:
            verdict = "UNCLEAR"

        return NLIResult(article, entail, contra, neutral, verdict)


# ============================
# PIPELINE
# ============================
relevant_articles = relevance_filter(user_text, search_results)

checker = NLIChecker()
results = [checker.bidirectional(a, user_text) for a in relevant_articles]

supports = [r for r in results if r.verdict == "SUPPORTS"]
contradicts = [r for r in results if r.verdict == "CONTRADICTS"]
unclear = [r for r in results if r.verdict == "UNCLEAR"]

# ============================
# FINAL DECISION
# ============================
support_score = sum(r.entailment for r in supports)
contra_score = sum(r.contradiction for r in contradicts)

total_evidence = support_score + contra_score + 1e-6

confidence = abs(support_score - contra_score) / total_evidence

if contra_score > support_score * 1.2 and contra_score > 0.7:
    final_verdict = "FALSE"
elif support_score > contra_score * 1.2 and support_score > 0.7:
    final_verdict = "TRUE"
else:
    final_verdict = "NOT ENOUGH INFO"


# ============================
# OUTPUT
# ============================
print("\n==============================")
print("USER HEADLINE:")
print(user_text)

print("\n--- ARTICLE-LEVEL NLI RESULTS ---")
for r in results:
    print("\nARTICLE:")
    print(r.article)
    print(f"SUPPORT={r.entailment:.3f}  CONTRA={r.contradiction:.3f}  NEUTRAL={r.neutral:.3f}")
    print("VERDICT:", r.verdict)

print("\n==============================")
print("FINAL VERDICT:", final_verdict)
print(f"CONFIDENCE SCORE: {confidence:.2f}")
print("==============================")


Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).



USER HEADLINE:
UN Climate Report Warns of Rising Sea Levels

--- ARTICLE-LEVEL NLI RESULTS ---

ARTICLE:
Climate change: Landmark UN report warns sea levels will rise ... Sep 25, 2019 ... Landmark UN report warns sea levels will rise faster than projected by 2100.
SUPPORT=0.986  CONTRA=0.003  NEUTRAL=0.979
VERDICT: SUPPORTS

ARTICLE:
Oceans are taking a beating under climate change, U.N. report warns Sep 25, 2019 ... Coastal communities, which will be home to a billion people by 2050, are also feeling the impacts, starting with rising seas. Over the last ...
SUPPORT=0.981  CONTRA=0.005  NEUTRAL=0.947
VERDICT: SUPPORTS

ARTICLE:
U.N. Climate Report Offers Grim New Forecast : NPR Oct 8, 2018 ... A U.N. report warns that even a 1.5-degree C increase ... higher sea levels, damage to agriculture and displacement of millions of people.
SUPPORT=0.983  CONTRA=0.002  NEUTRAL=0.992
VERDICT: SUPPORTS

ARTICLE:
World climate report warns of growing risks to lives, economy and ... Mar 18, 2025 ...

In [6]:
import requests
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

user_text = input("Enter headline: ")

query = user_text.replace(" ", "+")
url = f"https://www.googleapis.com/customsearch/v1?q={query}&cx={CX}&key={API_KEY}"
response = requests.get(url).json()
items = response.get("items", [])

search_results = []

for item in items:
    title = item.get("title", "")
    snippet = item.get("snippet", "")
    combined = title + " " + snippet
    if combined.strip():
        search_results.append(combined)

if not search_results:
    print("FAKE")
    exit()


# vectorizer = TfidfVectorizer(stop_words="english")
# tfidf_corpus = vectorizer.fit_transform(search_results)
# tfidf_user = vectorizer.transform([user_text])
# similarities = cosine_similarity(tfidf_user, tfidf_corpus).flatten()
# # best_score = max(similarities)

# if best_score > 0.35:
#     print("REAL")
# else:
#     print("FAKE")

# print(similarities)
# print(sum(similarities)/len(similarities))
# print(search_results)

In [None]:
from __future__ import annotations
# Testing With NLI

from dataclasses import dataclass
from typing import List, Dict, Any, Tuple
import re

import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification


# ----------------------------
# Configuration
# ----------------------------
MODEL_NAME = "roberta-large-mnli"  # strong default NLI model
LABELS = ["contradiction", "neutral", "entailment"]  # typical order for this model

TAU_ENTAIL = 0.65
TAU_CONTRA = 0.65

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


# ----------------------------
# Utilities
# ----------------------------
def normalise_text(s: str) -> str:
    """Light normalisation to reduce punctuation/whitespace noise."""
    s = s.strip()
    s = re.sub(r"\s+", " ", s)
    return s


@dataclass
class NLIResult:
    headline: str
    verdict: str  # MATCH_ENTAILS / MISMATCH_CONTRADICTS / UNCLEAR_NEUTRAL
    entailment: float
    contradiction: float
    neutral: float
    # optional: keep both-direction scores
    details: Dict[str, Any] | None = None


class NLIChecker:
    def __init__(self, model_name: str = MODEL_NAME, device: str = DEVICE):
        self.device = device
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
        self.model.eval()

        # Try to infer label mapping robustly if available
        # Many HF MNLI models expose id2label like {0:'CONTRADICTION',1:'NEUTRAL',2:'ENTAILMENT'}
        id2label = getattr(self.model.config, "id2label", None)
        if id2label and isinstance(id2label, dict) and len(id2label) == 3:
            # Create lowercase order mapping
            mapped = [id2label[i].lower() for i in range(3)]
            # If it matches expected set, use it
            if set(mapped) == {"contradiction", "neutral", "entailment"}:
                self.labels = mapped
            else:
                self.labels = LABELS
        else:
            self.labels = LABELS

    @torch.no_grad()
    def scores(self, premise: str, hypothesis: str) -> Dict[str, float]:
        premise = normalise_text(premise)
        hypothesis = normalise_text(hypothesis)

        enc = self.tokenizer(
            premise,
            hypothesis,
            return_tensors="pt",
            truncation=True,
            max_length=256,
        ).to(self.device)

        logits = self.model(**enc).logits
        probs = F.softmax(logits, dim=-1).squeeze(0).detach().cpu().tolist()
        return dict(zip(self.labels, probs))

    def decision_one_way(
        self,
        premise: str,
        hypothesis: str,
        tau_entail: float = TAU_ENTAIL,
        tau_contra: float = TAU_CONTRA,
    ) -> Tuple[str, Dict[str, float]]:
        s = self.scores(premise, hypothesis)
        if s.get("entailment", 0.0) >= tau_entail:
            return "MATCH_ENTAILS", s
        if s.get("contradiction", 0.0) >= tau_contra:
            return "MISMATCH_CONTRADICTS", s
        return "UNCLEAR_NEUTRAL", s

    def decision_bidir(
        self,
        headline: str,
        user_input: str,
        tau_entail: float = TAU_ENTAIL,
        tau_contra: float = TAU_CONTRA,
    ) -> NLIResult:
        # Direction A: headline -> input
        v1, s1 = self.decision_one_way(headline, user_input, tau_entail, tau_contra)
        # Direction B: input -> headline
        v2, s2 = self.decision_one_way(user_input, headline, tau_entail, tau_contra)

        # Conservative merge:
        # - if either direction is strong contradiction => contradiction
        # - else if either direction is strong entailment => entailment
        # - else neutral/unclear
        if s1.get("contradiction", 0.0) >= tau_contra or s2.get("contradiction", 0.0) >= tau_contra:
            verdict = "MISMATCH_CONTRADICTS"
        elif s1.get("entailment", 0.0) >= tau_entail or s2.get("entailment", 0.0) >= tau_entail:
            verdict = "MATCH_ENTAILS"
        else:
            verdict = "UNCLEAR_NEUTRAL"

        # For sorting, take max entail/contra across directions
        entail = max(s1.get("entailment", 0.0), s2.get("entailment", 0.0))
        contra = max(s1.get("contradiction", 0.0), s2.get("contradiction", 0.0))
        neutral = max(s1.get("neutral", 0.0), s2.get("neutral", 0.0))

        return NLIResult(
            headline=headline,
            verdict=verdict,
            entailment=float(entail),
            contradiction=float(contra),
            neutral=float(neutral),
            details={"headline->input": s1, "input->headline": s2, "verdicts": (v1, v2)},
        )


def check_input_against_headlines(
    user_input: str,
    headlines: List[str],
    top_k: int = 10,
) -> Dict[str, List[NLIResult]]:
    """
    Returns buckets:
      - supports (entails)
      - contradicts
      - unclear (neutral)
    Each bucket is sorted by a relevant score.
    """
    checker = NLIChecker()

    results: List[NLIResult] = []
    for h in headlines:
        if not isinstance(h, str) or not h.strip():
            continue
        results.append(checker.decision_bidir(h, user_input))

    supports = [r for r in results if r.verdict == "MATCH_ENTAILS"]
    contradicts = [r for r in results if r.verdict == "MISMATCH_CONTRADICTS"]
    unclear = [r for r in results if r.verdict == "UNCLEAR_NEUTRAL"]

    supports.sort(key=lambda r: r.entailment, reverse=True)
    contradicts.sort(key=lambda r: r.contradiction, reverse=True)
    unclear.sort(key=lambda r: r.neutral, reverse=True)

    return {
        "supports": supports[:top_k],
        "contradicts": contradicts[:top_k],
        "unclear": unclear[:top_k],
    }


# ----------------------------
# Example run (replace with your API output)
# ----------------------------
#if __name__ == "_main_":
# user_input = "India beat South Africa in the first ODI"

# # Example headlines list you got from your news API
# headlines = [
#     "South Africa beat India in the first ODI",
#     "India defeat South Africa in ODI opener",
#     "India vs South Africa: first ODI preview",
#     "First ODI: India edge past South Africa in a tense finish",
#     "South Africa secure series lead with win over India in opening ODI",
#     "Cricket: India and South Africa begin ODI series today",
# ]
user_input=user_text
headlines=search_results
buckets = check_input_against_headlines(user_input, headlines, top_k=10)

print("\nUSER INPUT:")
print(" ", user_input)

def print_bucket(title: str, items: List[NLIResult]):
    print(f"\n{title} ({len(items)}):")
    for r in items:
        print(f"- {r.verdict:20s}  entail={r.entailment:.3f}  contra={r.contradiction:.3f}  neutral={r.neutral:.3f}")
        print(f"  headline: {r.headline}")

print_bucket("SUPPORTS (ENTAILS)", buckets["supports"])
print_bucket("CONTRADICTS", buckets["contradicts"])
print_bucket("UNCLEAR / NEUTRAL", buckets["unclear"])

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).



USER INPUT:
   NASA Confirms Successful Deployment of New Solar Arrays on the ISS

SUPPORTS (ENTAILS) (1):
- MATCH_ENTAILS         entail=0.881  contra=0.009  neutral=0.992
  headline: International Space Station: Astronauts installed a giant solar panel ... Jun 25, 2021 ... ... successfully installed and deployed the second array. Coverage was streamed on NASA's TV channel and website, and began at 6:30 a.m. ET.

CONTRADICTS (2):
- MISMATCH_CONTRADICTS  entail=0.041  contra=0.996  neutral=0.179
  headline: Cargo spacecraft docks with ISS after solar panel fails to deploy ... Nov 9, 2022 ... A cargo spacecraft carrying supplies to the International Space Station successfully docked with the orbital output on Wednesday, ...
- MISMATCH_CONTRADICTS  entail=0.021  contra=0.836  neutral=0.924
  headline: Live updates: Intuitive machines Athena lander reaches the moon ... Mar 7, 2025 ... Commentators on Intuitive Machines' webcast confirmed that Athena is getting power, suggesting its solar

Search about SBERT model, MPNet, and NLI(Natural Language Inference)

In [3]:
import requests

query = "India economic news"

url = f"https://www.googleapis.com/customsearch/v1?q={query}&cx={CX}&key={API_KEY}"

response = requests.get(url).json()
print(response)
for item in response.get("items", []):
    print("Title:", item.get("title"))
    print("Link:", item.get("link"))
    print("Snippet:", item.get("snippet"))
    print()

{'kind': 'customsearch#search', 'url': {'type': 'application/json', 'template': 'https://www.googleapis.com/customsearch/v1?q={searchTerms}&num={count?}&start={startIndex?}&lr={language?}&safe={safe?}&cx={cx?}&sort={sort?}&filter={filter?}&gl={gl?}&cr={cr?}&googlehost={googleHost?}&c2coff={disableCnTwTranslation?}&hq={hq?}&hl={hl?}&siteSearch={siteSearch?}&siteSearchFilter={siteSearchFilter?}&exactTerms={exactTerms?}&excludeTerms={excludeTerms?}&linkSite={linkSite?}&orTerms={orTerms?}&dateRestrict={dateRestrict?}&lowRange={lowRange?}&highRange={highRange?}&searchType={searchType}&fileType={fileType?}&rights={rights?}&imgSize={imgSize?}&imgType={imgType?}&imgColorType={imgColorType?}&imgDominantColor={imgDominantColor?}&alt=json'}, 'queries': {'request': [{'title': 'Google Custom Search - India economic news', 'totalResults': '17700000', 'searchTerms': 'India economic news', 'count': 10, 'startIndex': 1, 'inputEncoding': 'utf8', 'outputEncoding': 'utf8', 'safe': 'off', 'cx': '952f538231

In [30]:
import pandas as pd
import csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error,r2_score,accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from sklearn.pipeline import Pipeline
user=[input('Enter an article you want to fact check: ')]
vectorizer = TfidfVectorizer()
tfidf_matrix1 = vectorizer.fit_transform(user)




  (np.int32(0), np.int32(2))	0.2
  (np.int32(0), np.int32(17))	0.4
  (np.int32(0), np.int32(4))	0.2
  (np.int32(0), np.int32(16))	0.2
  (np.int32(0), np.int32(13))	0.2
  (np.int32(0), np.int32(9))	0.2
  (np.int32(0), np.int32(15))	0.2
  (np.int32(0), np.int32(5))	0.2
  (np.int32(0), np.int32(1))	0.2
  (np.int32(0), np.int32(6))	0.2
  (np.int32(0), np.int32(0))	0.4
  (np.int32(0), np.int32(18))	0.2
  (np.int32(0), np.int32(8))	0.2
  (np.int32(0), np.int32(12))	0.2
  (np.int32(0), np.int32(7))	0.2
  (np.int32(0), np.int32(3))	0.2
  (np.int32(0), np.int32(11))	0.2
  (np.int32(0), np.int32(14))	0.2
  (np.int32(0), np.int32(10))	0.2


In [7]:
import pandas as pd
import csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error,r2_score,accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
df_true=pd.read_csv('True.csv')
df_true=df_true[['title', 'text']]
df_true['label']=1

df_fake=pd.read_csv('Fake.csv')
df_fake=df_fake[['title', 'text']]
df_fake['label']=0

df_train=pd.read_csv('train.csv')
df_train=df_train[['title', 'text', 'label']]

df_test=pd.read_csv('test.csv')
df_test=df_test[['title', 'text', 'label']]

df_WELFake_dataset=pd.read_csv('WELFake_Dataset.csv')
df_WELFake_dataset=df_WELFake_dataset[['title', 'text', 'label']]

df_combined=pd.concat([df_true, df_fake, df_train, df_test, df_WELFake_dataset], axis=0, ignore_index=True)
df_combined=df_combined.sample(frac=1, random_state=42).reset_index(drop=True)
df_combined=df_combined.dropna().reset_index(drop=True)
df_combined['title']=df_combined['title'].astype(str)
df_combined['text']=df_combined['text'].astype(str)
df_combined['combined_content']=df_combined['title']+ ' ' +df_combined['text']

x=df_combined['combined_content']
y=df_combined['label']
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [8]:
from sklearn.pipeline import Pipeline

pipeline_1=Pipeline([
    (
        "tfidf",TfidfVectorizer(
            lowercase=True,
            stop_words="english",
            ngram_range=(1,2),
            max_df=0.9,
            #min_df=2,
            max_features=100_000
        )
    ),
    (
        "cef",LogisticRegression(
            class_weight="balanced",
            max_iter=200,
            solver="liblinear"
        )
    )
])


pipeline_1.fit(X_train,Y_train)
y_pred=pipeline_1.predict(X_test)
acc=accuracy_score(Y_test,y_pred)
print(acc)

0.618822921043276


In [9]:
demo=[
    "Global Internet Outage Leaves Half the World Offline for 3 Hours — A massive technical fault in underwater cables caused millions to lose access to the internet for hours. Engineers blamed a software update.",
    "Scientists Claim They Detected Signs of Life on Europa — A research team reported unusual chemical signatures under Europa’s ice that may suggest microbes, though NASA says more testing is needed.",
    "City Introduces Sleep Pods on Streets to Reduce Homelessness — A U.S. city allegedly deployed climate-controlled sidewalk pods as temporary shelter, sparking debate about effectiveness.",
    "Major Airline Announces Standing-Room Tickets — An airline revealed a cheap standing-only seat option, but critics question the safety of flying without seats.",
    "New App Claims It Can Predict Weather Years Ahead — A startup says its AI can forecast five years into the future, but meteorologists reject the claim.",
    "Town’s Tap Water Turns Blue After Chemical Mix-Up — Residents panicked when blue water came from faucets, but officials said it came from a harmless testing dye.",
    "Robots Begin Delivering Mail in Small Midwest Community — Delivery robots replaced postal workers in a small town, exciting some residents and worrying others.",
    "Mysterious Green Lights Seen Across Three States — People reported glowing green streaks in the sky, likely caused by small meteor debris.",
    "School District May Replace Textbooks With VR — A district proposed switching all textbooks to VR lessons, dividing teachers on whether it helps learning.",
    "Gardener Claims 25-Foot Sunflower Breaks World Record — A local gardener says his sunflower reached 25 feet, though experts haven’t verified it.",

    #Real

    "Massive Explosion Rocks Beirut Port — A huge blast in 2020 caused by stored ammonium nitrate destroyed parts of Beirut, killing and injuring thousands.",
    "Volcano Eruption in Iceland Forces Evacuations — An Icelandic volcano erupted after warning signs, pushing residents to leave threatened areas.",
    "Train Derailment in Ohio Releases Toxic Chemicals — A train carrying hazardous materials derailed in East Palestine, Ohio, causing fires and chemical concerns.",
    "California Wildfires Destroy Thousands of Homes — Severe wildfires driven by drought and winds forced mass evacuations and caused major property loss.",
    "Hurricane Katrina Causes Widespread Damage — In 2005, Katrina flooded New Orleans and devastated the Gulf Coast, leading to massive loss of life.",
    "Explosion at Texas Chemical Plant Sends Shockwaves — A chemical plant blast in Texas forced evacuations in surrounding communities.",
    "SpaceX Rocket Explodes During Test Flight — A Starship prototype exploded after a landing attempt, which SpaceX described as part of the testing process.",
    "Genoa Bridge Collapse in Italy Kills Dozens — The Morandi Bridge collapsed in 2018 during a storm, raising concerns about infrastructure safety.",
    "Oil Spill Off California Coast Harms Wildlife — A pipeline leak spilled thousands of gallons of oil, damaging marine ecosystems and forcing beach closures.",
    "Massive Earthquake in Turkey Destroys Buildings — A strong earthquake struck parts of Turkey, causing widespread destruction and many casualties."
]
demo_predict=pipeline_1.predict(demo)
print(demo_predict)

probability=pipeline_1.predict_proba(demo)
for text,label,prob in zip(demo, demo_predict, probability):
  print(text)
  print("Predicted =", {"Real" if label==1 else "False"})
  print("Confidence =",prob)
  print("----")

[1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1]
Global Internet Outage Leaves Half the World Offline for 3 Hours — A massive technical fault in underwater cables caused millions to lose access to the internet for hours. Engineers blamed a software update.
Predicted = {'Real'}
Confidence = [0.28001114 0.71998886]
----
Scientists Claim They Detected Signs of Life on Europa — A research team reported unusual chemical signatures under Europa’s ice that may suggest microbes, though NASA says more testing is needed.
Predicted = {'Real'}
Confidence = [0.38184617 0.61815383]
----
City Introduces Sleep Pods on Streets to Reduce Homelessness — A U.S. city allegedly deployed climate-controlled sidewalk pods as temporary shelter, sparking debate about effectiveness.
Predicted = {'Real'}
Confidence = [0.39704835 0.60295165]
----
Major Airline Announces Standing-Room Tickets — An airline revealed a cheap standing-only seat option, but critics question the safety of flying without seats.
Predicted = {'Fals

11/22/25

In [10]:
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
joblib.dump(pipeline_1, 'my_sklearn_model.joblib')
loaded_model = joblib.load('my_sklearn_model.joblib')
loaded_model.predict(demo)


array([1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])

API 11/29/25