In [None]:

# 📰 Fake News Detection Using TF-IDF

# 📦 Import Libraries
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# 📥 Load and Combine Dataset
df_fake = pd.read_csv("Fake.csv")
df_real = pd.read_csv("True.csv")
df_fake['label'] = 0
df_real['label'] = 1
data = pd.concat([df_fake, df_real])
data = data.sample(frac=1).reset_index(drop=True)
data['text'] = data['title'] + " " + data['text']

# 🧹 Data Cleaning & Preprocessing
nltk.download('stopwords')
nltk.download('wordnet')
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    words = text.lower().split()
    words = [lemmatizer.lemmatize(w) for w in words if w not in stop_words]
    return ' '.join(words)

data['clean_text'] = data['text'].apply(clean_text)

# ☁️ WordCloud Visualization
fake_news = data[data['label'] == 0]['clean_text'].str.cat(sep=' ')
real_news = data[data['label'] == 1]['clean_text'].str.cat(sep=' ')

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.imshow(WordCloud(width=800, height=400).generate(fake_news))
plt.title('Fake News WordCloud')

plt.subplot(1, 2, 2)
plt.imshow(WordCloud(width=800, height=400).generate(real_news))
plt.title('Real News WordCloud')
plt.show()

# 🔡 TF-IDF Vectorization
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(data['clean_text']).toarray()
y = data['label']

# 📊 Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🤖 Model Training (Logistic Regression)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# 📈 Evaluation Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))

sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

print(classification_report(y_test, y_pred))
