# Advanced Fake News Detection Using NLP
This notebook demonstrates how to detect fake news using Natural Language Processing techniques and machine learning models.

## Step 1: Import Required Libraries

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import warnings
warnings.filterwarnings('ignore')


## Step 2: Load the Dataset

In [None]:

# Load dataset from CSV (Fake and Real News combined)
fake = pd.read_csv("https://raw.githubusercontent.com/clmentbisaillon/fake-and-real-news-dataset/master/Fake.csv")
real = pd.read_csv("https://raw.githubusercontent.com/clmentbisaillon/fake-and-real-news-dataset/master/True.csv")

fake['label'] = 0  # Fake
real['label'] = 1  # Real

df = pd.concat([fake[['text', 'label']], real[['text', 'label']]]).sample(frac=1).reset_index(drop=True)
df.head()


## Step 3: Preprocessing Text Data

In [None]:

nltk.download('stopwords')
from nltk.corpus import stopwords

def clean_text(text):
    text = text.lower()
    text = ''.join([ch for ch in text if ch not in string.punctuation])
    text = ' '.join([word for word in text.split() if word not in stopwords.words('english')])
    return text

df['clean_text'] = df['text'].apply(clean_text)
df.head()


## Step 4: Vectorization using TF-IDF

In [None]:

tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(df['clean_text']).toarray()
y = df['label']


## Step 5: Train-Test Split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Step 6: Train Logistic Regression Model

In [None]:

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


## Step 7: Evaluate the Model

In [None]:

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')
plt.title("Confusion Matrix")
plt.show()


## Step 8: Save the Model and Vectorizer

In [None]:

import pickle

with open("fake_news_model.pkl", "wb") as f:
    pickle.dump(model, f)
with open("tfidf_vectorizer.pkl", "wb") as f:
    pickle.dump(tfidf, f)


## Step 9: GitHub Implementation

In [None]:

# Instructions:
# 1. Create a GitHub repository (e.g., "fake-news-detector")
# 2. Upload these files: fake_news_model.pkl, tfidf_vectorizer.pkl, and a Streamlit or Flask app script.
# 3. Add a README.md to explain how to run the app.
# 4. Optionally deploy using Streamlit Cloud or Hugging Face Spaces.
