In [35]:
# Step 1: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report

#  Step 2: Create an Expanded Dummy Dataset
data = {
    'text': [
        'Government launches new AI training scheme for youth.',
        'Aliens have landed in Chennai, say local villagers.',
        'WHO approves vaccine to fight latest virus variant.',
        'Man cures cancer using garlic water, claims YouTube video.',
        'India wins global coding championship with record score.',
        'Flat Earth society says gravity is a hoax.',
        'ISRO to launch human space mission by 2025.',
        'Tomatoes can reverse aging in 5 days, says post.',
        'Students build drone to deliver medicines in rural areas.',
        'Time traveler appears in 2024 elections, claims article.',
        'NASA releases first image of black hole.',
        'Celebrity claims drinking shampoo boosts immunity.',
        'Doctors perform world’s first robotic brain surgery.',
        'Chocolate can cure all diseases, claims Facebook post.',
        'Indian Railways introduces high-speed green trains.',
        'Fake news spreads faster than truth, says study.',
        'Moon is made of cheese, claims local news channel.',
        'New AI app helps farmers detect crop disease early.',
        'Drinking cold water after meals causes heart attacks.',
        'UN praises India’s digital education reforms.'
    ],
    'label': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
              1, 0, 1, 0, 1, 1, 0, 1, 0, 1]  # 1 = Real, 0 = Fake
}

df = pd.DataFrame(data)

#  Step 3: Split Data (70% Train, 30% Test)
X = df['text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

#   Step 4: TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 5: Train Passive Aggressive Classifier
model = PassiveAggressiveClassifier(max_iter=1000)
model.fit(X_train_tfidf, y_train)

#  Step 6: Predict & Evaluate
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

#   Step 7: Show Results
print("Accuracy:", round(accuracy, 2))
print(" F1 Score:", round(f1, 2))
print("\n Classification Report:\n", classification_report(y_test, y_pred, target_names=['Fake', 'Real']))


Accuracy: 0.83
 F1 Score: 0.86

 Classification Report:
               precision    recall  f1-score   support

        Fake       0.67      1.00      0.80         2
        Real       1.00      0.75      0.86         4

    accuracy                           0.83         6
   macro avg       0.83      0.88      0.83         6
weighted avg       0.89      0.83      0.84         6

