In [6]:
pip install nltk


Collecting nltk
  Using cached nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting click (from nltk)
  Using cached click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting regex>=2021.8.3 (from nltk)
  Downloading regex-2024.11.6-cp312-cp312-win_amd64.whl.metadata (41 kB)
Collecting tqdm (from nltk)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Using cached nltk-3.9.1-py3-none-any.whl (1.5 MB)
Downloading regex-2024.11.6-cp312-cp312-win_amd64.whl (273 kB)
Using cached click-8.1.8-py3-none-any.whl (98 kB)
Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm, regex, click, nltk
Successfully installed click-8.1.8 nltk-3.9.1 regex-2024.11.6 tqdm-4.67.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 🛠️ Install NLTK stopwords
nltk.download('stopwords')

# 📂 **Dataset Paths (Update Your File Path If Needed)**
train_path = r"D:\fake news detection\fake-news\train.csv"
test_path = r"D:\fake news detection\fake-news\test.csv"
submit_path = r"D:\fake news detection\fake-news\submit.csv"

# 📰 **Loading Dataset**
news_dataset = pd.read_csv(train_path)

# 🔎 **Check for Missing Values**
print("Missing Values:\n", news_dataset.isnull().sum())

# 🚀 **Fill Missing Values**
news_dataset = news_dataset.fillna('')

# 🔗 **Merging Author & Title**
news_dataset['content'] = news_dataset['author'] + ' ' + news_dataset['title']

# 📌 **Separating Features (X) & Labels (Y)**
X = news_dataset['content']
Y = news_dataset['label']

print("X Shape:", X.shape)
print("Y Shape:", Y.shape)

# 🔄 **Text Preprocessing Function (Stemming + Stopword Removal)**
port_stem = PorterStemmer()

def stemming(content):
    content = re.sub('[^a-zA-Z]', ' ', content)  # Remove non-alphabetic characters
    content = content.lower().split()  # Lowercase & split words
    content = [port_stem.stem(word) for word in content if word not in stopwords.words('english')]
    return ' '.join(content)

# ✨ **Apply Preprocessing**
X = X.apply(stemming)

# 🔢 **Convert Text Data to Numerical (TF-IDF)**
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(X)

# 🔀 **Train-Test Split**
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

# 🏋️ **Train Model (Logistic Regression)**
model = LogisticRegression()
model.fit(X_train, Y_train)

# 📊 **Accuracy Scores**
X_train_pred = model.predict(X_train)
train_accuracy = accuracy_score(X_train_pred, Y_train)
print("✅ Accuracy (Training Data):", train_accuracy)

X_test_pred = model.predict(X_test)
test_accuracy = accuracy_score(X_test_pred, Y_test)
print("✅ Accuracy (Test Data):", test_accuracy)

# 🔮 **Prediction on a Sample News**
X_new = X_test[3]
prediction = model.predict(X_new.reshape(1, -1))  # Reshape to avoid errors

if prediction[0] == 0:
    print("📰 The news is REAL")
else:
    print("⚠️ The news is FAKE")

print("Actual Label:", Y_test.iloc[3])


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\abhay\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Missing Values:
 id           0
title      558
author    1957
text        39
label        0
dtype: int64
X Shape: (20800,)
Y Shape: (20800,)
✅ Accuracy (Training Data): 0.9863581730769231
✅ Accuracy (Test Data): 0.9790865384615385
📰 The news is REAL
Actual Label: 0


In [2]:
import joblib

# ✅ Save model & vectorizer in correct path
joblib.dump(model, "D:/fake news detection/model_fake_news.joblib")
joblib.dump(vectorizer, "D:/fake news detection/vectorizer_fake_news.joblib")

print("🎉 Model aur Vectorizer `.joblib` me save ho gaye ✅")


NameError: name 'model' is not defined