In [2]:
import pandas as pd
import string
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# Load data
true_df = pd.read_csv("True.csv")
fake_df = pd.read_csv("Fake.csv")

# Add labels
true_df['label'] = 1  # Real
fake_df['label'] = 0  # Fake

# Combine datasets
data = pd.concat([true_df, fake_df], ignore_index=True)

# Shuffle data
data = data.sample(frac=1).reset_index(drop=True)

# Clean text
def clean_text(text):
    text = text.lower()
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Remove URLs
    text = re.sub(r'[^a-z\s]', '', text)               # Remove punctuation/numbers
    return text

data['text'] = data['text'].apply(clean_text)


In [3]:
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X = vectorizer.fit_transform(data['text'])
y = data['label']

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9869710467706013
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4738
           1       0.99      0.99      0.99      4242

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [6]:
import pickle

# Save model
with open("model.pkl", "wb") as model_file:
    pickle.dump(model, model_file)

# Save vectorizer
with open("vectorizer.pkl", "wb") as vec_file:
    pickle.dump(vectorizer, vec_file)


In [5]:
def predict_news(news_text):
    clean = clean_text(news_text)
    vect = vectorizer.transform([clean])
    prediction = model.predict(vect)
    return "Real" if prediction[0] == 1 else "Fake"


In [10]:
app_code = ""
import streamlit as st
import pickle
import re

# Load trained model and vectorizer
model = pickle.load(open('model.pkl', 'rb'))
vectorizer = pickle.load(open('vectorizer.pkl', 'rb'))

# Function to clean user input
def clean_text(text):
    text = text.lower()
    text = re.sub(r'https?://\\S+|www\\.\\S+', '', text)  # remove URLs
    text = re.sub(r'[^a-z\\s]', '', text)                 # remove punctuation/numbers
    return text

# Prediction function
def predict_news(news_text):
    clean = clean_text(news_text)
    vec = vectorizer.transform([clean])
    prediction = model.predict(vec)
    return "🟢 Real News" if prediction[0] == 1 else "🔴 Fake News"

# Streamlit App UI
st.set_page_config(page_title="Fake News Detector", page_icon="📰")
st.title("📰 Fake News Detection App")
st.markdown("Enter a news article or paragraph, and the model will predict whether it's **Real** or **Fake**.")

# Input field
news_input = st.text_area("📝 Paste News Article Here", height=200)

# Predict button
if st.button("🔍 Predict"):
    if news_input.strip() == "":
        st.warning("⚠️ Please enter some text.")
    else:
        result = predict_news(news_input)
        st.success(f"Prediction: {result}")
""

# Write to app.py
with open("app.py", "w") as f:
    f.write(app_code)


