In [2]:
# ========================
# üìò Importing Libraries
# ========================
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import re
import string
import joblib


In [3]:
# ========================
# üìÇ Loading the Datasets
# ========================
fake = pd.read_csv('Fake.csv')
true = pd.read_csv('True.csv')

print("Fake news shape:", fake.shape)
print("True news shape:", true.shape)

fake.head()


Fake news shape: (23481, 4)
True news shape: (21417, 4)


Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year‚Äô...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama‚Äôs Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [4]:
# ========================
# üßæ Adding Labels
# ========================
fake['label'] = 0  # 0 ‚Üí Fake
true['label'] = 1  # 1 ‚Üí True

# Combine datasets
data = pd.concat([fake, true], axis=0)
data = data.sample(frac=1).reset_index(drop=True)  # Shuffle data

data.head()


Unnamed: 0,title,text,subject,date,label
0,U.S. Supreme Court turns away companies in Cal...,WASHINGTON (Reuters) - The U.S. Supreme Court ...,politicsNews,"October 11, 2016",1
1,Investors buoyed by Trump's readiness to move ...,(Reuters) - Wall Street‚Äôs predilection for a g...,politicsNews,"March 24, 2017",1
2,LOL! NYC COLLEGE Tells NYPD To Use Bathroom At...,Brooklyn College is advising New York City pol...,left-news,"Nov 21, 2017",0
3,Trump Ally Curt Schilling Blasts ‚ÄòHorrifying‚Äô...,"Former Red Sox pitcher Curt Schilling, the hos...",News,"October 1, 2017",0
4,Russian opposition leader's fraud conviction a...,"STRASBOURG, France (Reuters) - Europe s top hu...",worldnews,"October 17, 2017",1


In [5]:
# ========================
# üßπ Cleaning the Text
# ========================
def clean_text(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)  # remove brackets
    text = re.sub("\\W", " ", text)     # remove non-word chars
    text = re.sub('https?://\S+|www\.\S+', '', text)  # remove links
    text = re.sub('<.*?>+', '', text)   # remove html tags
    text = re.sub("[%s]" % re.escape(string.punctuation), "", text)  # remove punctuations
    text = re.sub("\n", "", text)       # remove newline
    text = re.sub("\w*\d\w*", "", text) # remove words with digits
    return text

data['text'] = data['text'].apply(clean_text)
data.head()


  text = re.sub('\[.*?\]', '', text)  # remove brackets
  text = re.sub('https?://\S+|www\.\S+', '', text)  # remove links
  text = re.sub("\w*\d\w*", "", text) # remove words with digits


Unnamed: 0,title,text,subject,date,label
0,U.S. Supreme Court turns away companies in Cal...,washington reuters the u s supreme court ...,politicsNews,"October 11, 2016",1
1,Investors buoyed by Trump's readiness to move ...,reuters wall street s predilection for a g...,politicsNews,"March 24, 2017",1
2,LOL! NYC COLLEGE Tells NYPD To Use Bathroom At...,brooklyn college is advising new york city pol...,left-news,"Nov 21, 2017",0
3,Trump Ally Curt Schilling Blasts ‚ÄòHorrifying‚Äô...,former red sox pitcher curt schilling the hos...,News,"October 1, 2017",0
4,Russian opposition leader's fraud conviction a...,strasbourg france reuters europe s top hu...,worldnews,"October 17, 2017",1


In [6]:
# ========================
# ‚úÇÔ∏è Splitting Data
# ========================
X = data['text']
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

print("Training samples:", len(X_train))
print("Testing samples:", len(X_test))


Training samples: 33673
Testing samples: 11225


In [7]:
# ========================
# üî§ TF-IDF Vectorization
# ========================
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

print("TF-IDF matrix shape:", X_train_tfidf.shape)


TF-IDF matrix shape: (33673, 94674)


In [8]:
# ========================
# ü§ñ Logistic Regression Model
# ========================
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.99      0.98      0.98      5835
           1       0.98      0.98      0.98      5390

    accuracy                           0.98     11225
   macro avg       0.98      0.98      0.98     11225
weighted avg       0.98      0.98      0.98     11225



In [9]:
# ========================
# üíæ Save Model and Vectorizer
# ========================
joblib.dump(model, 'fake_news_model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')

print("‚úÖ Model and Vectorizer saved successfully!")


‚úÖ Model and Vectorizer saved successfully!


In [10]:
# ========================
# üîç Test Prediction
# ========================
def predict_news(news_text):
    cleaned = clean_text(news_text)
    transformed = vectorizer.transform([cleaned])
    prediction = model.predict(transformed)
    return "üì∞ Real News" if prediction[0] == 1 else "‚ö†Ô∏è Fake News"

# Example test
sample = "The president announced a new education policy today."
print(predict_news(sample))


‚ö†Ô∏è Fake News
