# News Article Classification (Fake/Real)
This notebook classifies news articles as real or fake using a basic NLP pipeline.

In [2]:
import pandas as pd

# Load the two datasets
fake_df = pd.read_csv('Fake.csv')
real_df = pd.read_csv('True.csv')

# Add labels
fake_df['label'] = 'FAKE'
real_df['label'] = 'REAL'

# Keep only necessary columns
fake_df = fake_df[['text', 'label']]
real_df = real_df[['text', 'label']]

# Combine them
merged_df = pd.concat([fake_df, real_df], ignore_index=True)

# Shuffle the data
merged_df = merged_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Save to a new CSV file
merged_df.to_csv('news_kaggle_merged.csv', index=False)

print("✅ Merged dataset saved as 'news_kaggle_merged.csv'")


✅ Merged dataset saved as 'news_kaggle_merged.csv'


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Load dataset
df = pd.read_csv('news_kaggle_merged.csv')
df['label'] = df['label'].map({'FAKE': 0, 'REAL': 1})

# Prepare data
X = df['text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vectorize
vectorizer = TfidfVectorizer(stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train model
model = LogisticRegression()
model.fit(X_train_vec, y_train)

# Evaluate
y_pred = model.predict(X_test_vec)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.98      0.98      4710
           1       0.98      0.99      0.98      4270

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980



  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
  norm2_w = weights @ weights if weights.ndim == 1 else squared_norm(weights)
