In [None]:
# Step 1: Install required packages
# !pip install nltk scikit-learn pandas joblib

import pandas as pd
import numpy as np
import string
import nltk
import joblib
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# Download NLTK stopwords
nltk.download('stopwords')

# Step 2: Load Dataset (upload 'Fake.csv' and 'True.csv')
fake = pd.read_csv("/content/Fake.csv")
true = pd.read_csv("/content/True.csv")

# Step 3: Label data and combine
fake['label'] = 1  # FAKE
true['label'] = 0  # REAL
df = pd.concat([fake, true])[['title', 'label']].dropna()
df = df.sample(frac=1).reset_index(drop=True)  # Shuffle

# Step 4: Preprocess text
def clean_text(text):
    text = text.lower()
    text = ''.join([c for c in text if c not in string.punctuation])
    tokens = text.split()
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    return ' '.join(tokens)

df['clean_text'] = df['title'].apply(clean_text)

# Step 5: Split data
X = df['clean_text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: TF-IDF Vectorization
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Step 7: Train model
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Step 8: Evaluate model
y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 9: Save model and vectorizer
joblib.dump(model, "model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Accuracy: 0.9400890868596882

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.92      0.94      4367
           1       0.93      0.96      0.94      4613

    accuracy                           0.94      8980
   macro avg       0.94      0.94      0.94      8980
weighted avg       0.94      0.94      0.94      8980



['vectorizer.pkl']