In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
# Load the dataset
df = pd.read_csv("C:\\Users\\prath\\Downloads\\Fake.csv")  # Change the path to the location of your dataset

# Check for missing values and remove them
df.dropna(inplace=True)

# Combine title and text columns for text classification
df['text'] = df['title'] + " " + df['text']

# Create labels (1 for fake news, 0 for real news)
df['label'] = 1

# Load the real news dataset
df_real = pd.read_csv("C:\\Users\\prath\\Downloads\\True.csv") 

# Combine title and text columns for text classification
df_real['text'] = df_real['title'] + " " + df_real['text']

# Create labels (0 for real news)
df_real['label'] = 0

# Combine the real and fake news data
data = pd.concat([df, df_real])

In [3]:
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)


In [4]:
# TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # You can adjust the max_features
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [5]:
# Train a Multinomial Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train_tfidf, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test_tfidf)

In [6]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:\n", confusion)
print("Classification Report:\n", classification_rep)

Accuracy: 0.932293986636971
Confusion Matrix:
 [[3912  335]
 [ 273 4460]]
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.92      0.93      4247
           1       0.93      0.94      0.94      4733

    accuracy                           0.93      8980
   macro avg       0.93      0.93      0.93      8980
weighted avg       0.93      0.93      0.93      8980

