In [1]:
import pandas as pd
import numpy as np
import nltk  # You might need to install NLTK and download datasets
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


In [2]:
# Assuming you have downloaded the dataset and placed it in the appropriate directory
fake_news_data = pd.read_csv("C:\\Users\\prath\\Downloads\\Fake.csv")
real_news_data = pd.read_csv("C:\\Users\\prath\\Downloads\\True.csv")

In [3]:
# Label the data
fake_news_data['label'] = 1  # Fake news is labeled as 1
real_news_data['label'] = 0  # Real news is labeled as 0

# Combine the datasets
combined_data = pd.concat([fake_news_data, real_news_data])

# Shuffle the data
combined_data = combined_data.sample(frac=1).reset_index(drop=True)

# Text preprocessing
stop_words = set(stopwords.words('english'))
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove stopwords
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

combined_data['text'] = combined_data['text'].apply(preprocess_text)

In [4]:
X = combined_data['text']
y = combined_data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Using Count Vectorization
count_vectorizer = CountVectorizer()
X_train_counts = count_vectorizer.fit_transform(X_train)
X_test_counts = count_vectorizer.transform(X_test)

# Using TF-IDF Vectorization
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [6]:
# Initialize and train the model
clf = MultinomialNB()
clf.fit(X_train_tfidf, y_train)

# Predict on the test set
y_pred = clf.predict(X_test_tfidf)

In [7]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print(report)

Accuracy: 0.9409799554565702
              precision    recall  f1-score   support

           0       0.95      0.93      0.94      4368
           1       0.94      0.95      0.94      4612

    accuracy                           0.94      8980
   macro avg       0.94      0.94      0.94      8980
weighted avg       0.94      0.94      0.94      8980

