# ðŸ“° Fake News Detection using Machine Learning
### Python + Scikit-learn Project

This notebook builds a Fake News Detection model using TF-IDF and Logistic Regression.

In [None]:
import pandas as pd
import numpy as np
import re
import nltk
import matplotlib.pyplot as plt

from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

nltk.download('stopwords')

## Load Dataset
Make sure Fake.csv and True.csv are in the same folder.

In [None]:
fake = pd.read_csv('Fake.csv')
true = pd.read_csv('True.csv')

fake['label'] = 0
true['label'] = 1

data = pd.concat([fake, true])
data = data[['text', 'label']]
data.head()

## Data Preprocessing

In [None]:
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return " ".join(words)

data['text'] = data['text'].apply(clean_text)
data.head()

## TF-IDF Vectorization

In [None]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(data['text'])
y = data['label']

## Train Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Train Logistic Regression Model

In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

## Model Evaluation

In [None]:
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('\nClassification Report:\n')
print(classification_report(y_test, y_pred))

## Test Custom News

In [None]:
def predict_news(text):
    text = clean_text(text)
    vector = vectorizer.transform([text])
    prediction = model.predict(vector)
    return 'REAL' if prediction[0] == 1 else 'FAKE'

# Example:
sample_news = "Breaking news: Scientists discover new planet similar to Earth."
print(predict_news(sample_news))