In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier

# Sample data
texts = ["This is real news", "Click here to win a million dollars!", "Government confirms new law"]
labels = ["REAL", "FAKE", "REAL"]

# Vectorize text
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)

# Train model
model = PassiveAggressiveClassifier()
model.fit(X, labels)

# Test
test_news = ["Win a free iPhone now!"]
test_vec = vectorizer.transform(test_news)
print("Prediction:", model.predict(test_vec)[0])

Prediction: FAKE


In [3]:
# Fake News Detection using sklearn with hardcoded data
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Sample dataset (hardcoded)
texts = [
    "Breaking: Actor says the earth is flat",        # fake
    "NASA confirms new planet discovered",           # real
    "Vaccines are dangerous and should be banned",   # fake
    "COVID-19 vaccine approved by WHO",              # real
    "Aliens built the pyramids, not Egyptians",      # fake
    "Indian economy shows signs of recovery",        # real
    "Global warming is a hoax",                      # fake
    "Government launches new education policy",      # real
]

labels = ['FAKE', 'REAL', 'FAKE', 'REAL', 'FAKE', 'REAL', 'FAKE', 'REAL']

# Split data
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.25, random_state=7)

# Vectorization using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = vectorizer.fit_transform(X_train)
tfidf_test = vectorizer.transform(X_test)

# Model: Passive Aggressive Classifier
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(tfidf_train, y_train)

# Predict and Evaluate
y_pred = model.predict(tfidf_test)
score = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Output
print("Accuracy: {:.2f}%".format(score * 100))
print("\nConfusion Matrix:\n", conf_matrix)

# Try a new prediction
new_text = ["The moon landing was faked"]
new_vec = vectorizer.transform(new_text)
print("\nPrediction for test news: ", model.predict(new_vec)[0])

Accuracy: 50.00%

Confusion Matrix:
 [[1 0]
 [1 0]]

Prediction for test news:  FAKE
