In [None]:

import pandas as pd
import numpy as np
import re

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, f1_score, classification_report

In [None]:
fake = pd.read_csv("/content/Fake.csv")
real = pd.read_csv("/content/True.csv")

fake['label'] = 0   # Fake
real['label'] = 1   # Real

data = pd.concat([fake[['text','label']], real[['text','label']]])
data = data.sample(frac=1).reset_index(drop=True)

data.head()

Unnamed: 0,text,label
0,CARACAS (Reuters) - Members of Venezuela s gov...,1
1,Chris Christie s campaign has signaled that th...,0
2,DUBLIN (Reuters) - Progress is being made in t...,1
3,MOSCOW (Reuters) - The presence of armed men i...,1
4,The Obama trifecta: A Muslim an Illegal Alien ...,0



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.



In [None]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text

data['text'] = data['text'].apply(clean_text)

In [None]:
X = data['text']
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
vectorizer = TfidfVectorizer(
    stop_words='english',
    max_df=0.7
)

X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [None]:
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(X_train_tfidf, y_train)

y_pred_pac = pac.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred_pac))
print("F1 Score:", f1_score(y_test, y_pred_pac))
print(classification_report(y_test, y_pred_pac))

Accuracy: 0.9929844097995546
F1 Score: 0.9925133689839573
              precision    recall  f1-score   support

           0       1.00      0.99      0.99      4788
           1       0.99      1.00      0.99      4192

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [None]:
svm = LinearSVC()
svm.fit(X_train_tfidf, y_train)

y_pred_svm = svm.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("F1 Score:", f1_score(y_test, y_pred_svm))

Accuracy: 0.993652561247216
F1 Score: 0.9932199357678125


In [None]:
def predict_news(text):
    text = clean_text(text)
    vector = vectorizer.transform([text])
    prediction = pac.predict(vector)
    return "REAL NEWS" if prediction[0] == 1 else "FAKE NEWS"

predict_news("Breaking: Government announces new education policy")

'FAKE NEWS'