In [8]:
import pandas as pd

fake = pd.read_csv("Fake.csv")
real = pd.read_csv("True.csv")

print("Fake News Shape:", fake.shape)
print("Real News Shape:", real.shape)


Fake News Shape: (23481, 4)
Real News Shape: (21417, 4)


In [9]:

# Add labels
fake["label"] = 0
real["label"] = 1

# Combine
data = pd.concat([fake, real])

# Shuffle
data = data.sample(frac=1).reset_index(drop=True)

# Check shape
print("Combined Data Shape:", data.shape)
print(data.head())

Combined Data Shape: (44898, 5)
                                               title  \
0  “LITTLE” MARCO RUBIO GRILLS Trump’s Secretary ...   
1  BEST EVER CROOKED HILLARY Collection Of Lies A...   
2  MEGYN KELLY Not Exactly Getting Warm Welcome A...   
3  White House supports renewal of spy law withou...   
4  Factbox: Trump on Twitter (July 11) - Mississi...   

                                                text       subject  \
0                                                         politics   
1  Hillary Clinton has had a loooong career at ly...      politics   
2  NBC is canceling the third hour of  Today  to ...      politics   
3  WASHINGTON (Reuters) - The Trump administratio...  politicsNews   
4  The following statements were posted to the ve...  politicsNews   

             date  label  
0    Jan 11, 2017      0  
1    May 28, 2016      0  
2    Jan 27, 2017      0  
3  March 1, 2017       1  
4  July 12, 2017       1  


In [10]:
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

nltk.download('stopwords')
nltk.download('wordnet')

stop_words = stopwords.words('english')
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return ' '.join(words)

data["text"] = data["text"].apply(clean_text)
print(data["text"].head())


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


0                                                     
1    hillary clinton loooong career lying twisting ...
2    nbc canceling third hour today make way new st...
3    washington reuters trump administration want r...
4    following statement posted verified twitter ac...
Name: text, dtype: object


In [11]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

X = data["text"]
y = data["label"]

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9485523385300668
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.93      0.95      4703
           1       0.93      0.97      0.95      4277

    accuracy                           0.95      8980
   macro avg       0.95      0.95      0.95      8980
weighted avg       0.95      0.95      0.95      8980



In [13]:
def predict_news(news_text):
    # Step 1: Text ko vector mein badlo (same vectorizer use karo)
    transformed_text = vectorizer.transform([news_text])
    
    # Step 2: Predict karo
    prediction = model.predict(transformed_text)

    # Step 3: Output dikhayo
    if prediction[0] == 0:
        print("❌ This news is FAKE")
    else:
        print("✅ This news is REAL")


In [14]:
predict_news("NASA has confirmed the discovery of a new habitable planet.")



✅ This news is REAL


In [15]:
predict_news("Pakistan wins the FIFA World Cup 2026.")
predict_news("Apple announces iPhone 20 with teleportation feature.")
predict_news("Government launches AI-powered education system in public schools.")


✅ This news is REAL
✅ This news is REAL
✅ This news is REAL
