In [11]:
import pandas as pd
import joblib
import re
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

#  Step 1: Load reduced dataset
df = pd.read_csv("news_dataset_small.csv")  

#  Step 2: Check required columns
if "text" not in df.columns or "label" not in df.columns:
    raise ValueError("CSV must contain 'text' and 'label' columns")

#  Step 3: Drop any NaN values
df = df[['text', 'label']].dropna()

#  Step 4: Split data
X_train, X_test, y_train, y_test = train_test_split(
    df["text"], df["label"], test_size=0.2, random_state=42, stratify=df["label"]
)

#  Step 5: TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

#  Step 6: Train model
model = LogisticRegression()
model.fit(X_train_vec, y_train)

#  Step 7: Evaluate model
y_pred = model.predict(X_test_vec)
print(f" Model Accuracy: {accuracy_score(y_test, y_pred):.2%}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

#  Step 8: Save model and vectorizer
joblib.dump(model, "lr_model.jb")
joblib.dump(vectorizer, "vectorizer.jb")
print(" Model and vectorizer saved successfully.")


 Model Accuracy: 95.60%

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.95      0.95       469
           1       0.95      0.96      0.96       531

    accuracy                           0.96      1000
   macro avg       0.96      0.96      0.96      1000
weighted avg       0.96      0.96      0.96      1000

 Model and vectorizer saved successfully.


✅ Train Accuracy: 1.0000
✅ Test Accuracy: 1.0000

📊 Classification Report:

              precision    recall  f1-score   support

        fake       1.00      1.00      1.00        71
        real       1.00      1.00      1.00        72
   uncertain       1.00      1.00      1.00        57

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200

💾 Model saved as multiclass_fake_news_model.pkl





📰 News: Bill Gates implanted microchips in COVID-19 vaccines
📌 Prediction: FAKE
🎯 Confidence: 34.06%
