In [5]:
## Fake news Detection 

# Fake News Detection using Logistic Regression, NLTK, and Streamlit

# Import Libraries
import pandas as pd
import numpy as np
import re
import nltk
import streamlit as st
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, classification_report
import joblib

# NLTK Download
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

# Step 1: Load Data
data_fake = pd.read_csv('C:\\Users\\prach\\Downloads\\Fake.csv')
data_true = pd.read_csv('C:\\Users\\prach\\Downloads\\True (1).csv')

data_fake['label'] = 0  # Fake = 0
data_true['label'] = 1  # Real = 1

data = pd.concat([data_fake, data_true], axis=0)
data = data[['text', 'label']].sample(frac=1).reset_index(drop=True)

# Step 2: Text Cleaning Function
def clean_text(text):
    text = re.sub('[^a-zA-Z]', ' ', str(text))
    text = text.lower()
    words = text.split()
    words = [stemmer.stem(w) for w in words if w not in stop_words]
    return ' '.join(words)

# Clean the text
data['text'] = data['text'].apply(clean_text)

# Step 3: TF-IDF Vectorization
tfidf = TfidfVectorizer(max_df=0.7)
X = tfidf.fit_transform(data['text'])
y = data['label']

# Step 4: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train Logistic Regression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Step 6: Evaluate
y_pred = model.predict(X_test)
print("\nModel Evaluation")
print("Accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%")
print("F1 Score:", round(f1_score(y_test, y_pred) * 100, 2), "%")
print(classification_report(y_test, y_pred))

# Step 7: Save Model and Vectorizer
joblib.dump(model, 'fake_news_model.pkl')
joblib.dump(tfidf, 'tfidf_vectorizer.pkl')

# Step 8: Streamlit App
st.title("📰 Fake News Detection App")

user_input = st.text_area("Enter a news article or headline:")

if st.button("Predict"):
    if not user_input.strip():
        st.warning("Please enter some news text.")
    else:
        cleaned_input = clean_text(user_input)A
        vector_input = tfidf.transform([cleaned_input])
        result = model.predict(vector_input)[0]
        st.success("🟢 Real News" if result == 1 else "🔴 Fake News")


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\prach\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!



Model Evaluation
Accuracy: 98.46 %
F1 Score: 98.39 %
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4683
           1       0.98      0.98      0.98      4297

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980



