In [1]:
import pandas as pd

# Load the data
fake = pd.read_csv('Fake.csv')
real = pd.read_csv('True.csv')

# Add labels
fake['label'] = 0
real['label'] = 1

# Combine datasets
data = pd.concat([fake, real])
data = data[['text', 'label']]

data.head()


Unnamed: 0,text,label
0,Donald Trump just couldn t wish all Americans ...,0
1,House Intelligence Committee Chairman Devin Nu...,0
2,"On Friday, it was revealed that former Milwauk...",0
3,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis used his annual Christmas Day mes...,0


In [2]:
import string
import re

def clean_text(text):
    text = text.lower()  # lowercase
    text = re.sub(r"http\S+", "", text)  # remove links
    text = re.sub(r"<.*?>", "", text)  # remove HTML tags
    text = text.translate(str.maketrans("", "", string.punctuation))  # remove punctuation
    return text

# Apply cleaning to all news articles
data['text'] = data['text'].apply(clean_text)

# Check cleaned data
data.head()


Unnamed: 0,text,label
0,donald trump just couldn t wish all americans ...,0
1,house intelligence committee chairman devin nu...,0
2,on friday it was revealed that former milwauke...,0
3,on christmas day donald trump announced that h...,0
4,pope francis used his annual christmas day mes...,0


In [5]:
from sklearn.model_selection import train_test_split

X = data['text']       # input: the news text
y = data['label']      # output: 0 or 1 (fake or real)

# Split into 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Convert text to numerical data
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [8]:
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Create and train the model
model = PassiveAggressiveClassifier()
model.fit(X_train_tfidf, y_train)

# Make predictions
y_pred = model.predict(X_test_tfidf)

# Check accuracy
score = accuracy_score(y_test, y_pred)
print(f"Accuracy: {round(score*100, 2)}%")


Accuracy: 99.48%


In [9]:
def check_news(news_text):
    news_text = clean_text(news_text)  # clean it
    news_vector = vectorizer.transform([news_text])  # convert to numbers
    prediction = model.predict(news_vector)

    if prediction[0] == 1:
        print("🟢 Real News")
    else:
        print("🔴 Fake News")

# Example test
check_news("Breaking: Scientists discover water on the moon!")


🔴 Fake News
