In [30]:
import kagglehub

# Download latest version of dataset
path = kagglehub.dataset_download("emineyetm/fake-news-detection-datasets")

print("Path to dataset files:", path)


Path to dataset files: C:\Users\ayush_gyu7\.cache\kagglehub\datasets\emineyetm\fake-news-detection-datasets\versions\1


In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report

In [32]:
import os

subfolder = os.path.join(path, "News _dataset")
print("Files inside subfolder:\n", os.listdir(subfolder))


Files inside subfolder:
 ['Fake.csv', 'True.csv']


In [33]:
# Correct paths (use CSV not XLSX)
fake_path = os.path.join(subfolder, "Fake.csv")
true_path = os.path.join(subfolder, "True.csv")

# Read CSV files
df_fake = pd.read_csv(fake_path)
df_true = pd.read_csv(true_path)

# Add labels
df_fake["label"] = 0   # Fake = 0
df_true["label"] = 1   # True = 1

# Merge both
data = pd.concat([df_fake, df_true], axis=0).reset_index(drop=True)

data.head()


Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [34]:
X = data["text"]  
y = data["label"]

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [36]:
vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [38]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

# Predictions
y_pred = model.predict(X_test_tfidf)

In [39]:
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9846325167037862

Confusion Matrix:
 [[4658   75]
 [  63 4184]]

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.99      4733
           1       0.98      0.99      0.98      4247

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980



In [44]:
sample_news = [
    "Breaking news: Scientists discover water on Mars!",
    "The Indian Space Research Organisation (ISRO) successfully launched its Aditya-L1 mission, India’s first observatory to study the Sun, from the Sriharikota launch site"
]

In [45]:

predictions = model.predict(vectorizer.transform(sample_news))

In [46]:
predictions

array([0, 0])

In [47]:
for text, label in zip(sample_news, predictions):
    print(f"\nNews: {text}\nPrediction: {'True' if label==1 else 'Fake'}")


News: Breaking news: Scientists discover water on Mars!
Prediction: Fake

News: The Indian Space Research Organisation (ISRO) successfully launched its Aditya-L1 mission, India’s first observatory to study the Sun, from the Sriharikota launch site
Prediction: Fake
