In [2]:
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report , accuracy_score
import re
import joblib 
import string



In [None]:





fake = pd.read_csv('Fake.csv')
true = pd.read_csv('True.csv')

In [4]:
fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [5]:
true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [6]:
fake['class']=0
true['class']=1

In [7]:
data = pd.concat([fake,true],axis=0)

In [8]:
data.sample(5)

Unnamed: 0,title,text,subject,date,class
4351,"U.S. lawmakers back Syria strikes, demand a pl...",WASHINGTON (Reuters) - Members of the U.S. Con...,politicsNews,"April 7, 2017",1
2409,Ruth Bader Ginsburg Rebukes Trump In Defense ...,One of the most revered Supreme Court Justices...,News,"February 23, 2017",0
7842,Judge gives Florida voters more time to regist...,(Reuters) - Florida residents were given six e...,politicsNews,"October 12, 2016",1
12965,Former Catalan leader says to stay in Belgium ...,BRUSSELS (Reuters) - Former Catalan leader Car...,worldnews,"December 6, 2017",1
3854,Trump says he fired FBI chief because he 'wasn...,WASHINGTON (Reuters) - U.S. President Donald T...,politicsNews,"May 10, 2017",1


In [9]:
data = data.drop(['title','subject','date'],axis = 1)

In [10]:
data.reset_index(inplace=True)

In [11]:
data.drop(['index'],axis=1,inplace=True)

In [12]:
data.sample(15)

Unnamed: 0,text,class
24271,WASHINGTON (Reuters) - U.S. House Republicans ...,1
9700,This is great! Gen. John Kelly surprised the W...,0
39811,LONDON (Reuters) - Britain said on Friday it ...,1
24381,WASHINGTON (Reuters) - Republicans in the U.S....,1
28777,(Reuters) - Here are some of the highlights of...,1
34771,WASHINGTON (Reuters) - The United States and R...,1
11655,"Last November, the UNC campus was full of angr...",0
16636,The replacement of workers by big corporations...,0
7329,The Ultra Music Festival in Miami is one of th...,0
33290,"MEXICO CITY (Reuters) - At first, Mexico’s gov...",1


In [13]:
def clean_text(text):
    text = text.lower()
    # regular expressions to refine text
    
    text = re.sub('\[.*?\]',"",text)
    text = re.sub("\\W"," ",text)
    text = re.sub("https?:://\S+|www\.\S","",text)
    text = re.sub("<.*?>+","",text)
    text = re.sub("[%s]"% re.escape(string.punctuation), "" , text)
    text = re.sub("\n","",text)
    text = re.sub("\w*\d\w*","",text)
    return text


In [14]:
data['text'] = data['text'].apply(clean_text)

In [15]:
data.sample(5)

Unnamed: 0,text,class
31827,washington reuters u s president barack o...,1
13249,afshin rattans goes underground inside the ecu...,0
25691,seoul reuters north korea said on saturday...,1
935,on independence day national public radio twe...,0
4374,speaking to a bunch of his merry morons on mon...,0


In [16]:
x=data['text']
y= data['class']

# using 25% of the dataset 
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.25,random_state=42)


In [None]:
vectorizer = TfidfVectorizer(max_features=5000)  # Limiting max-features to 5000
xv_train = vectorizer.fit_transform(xtrain)
xv_test = vectorizer.transform(xtest)

In [26]:
# Initialize models
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "SVM": SVC(probability=True, kernel='linear'),  
    "Naïve Bayes": MultinomialNB(),
    "Random Forest": RandomForestClassifier(n_estimators=50, max_depth=10),  
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=50)  
}



In [28]:
accuracies = {}
model_files = {}
for name, model in models.items():
    model.fit(xv_train, ytrain)
    predictions = model.predict(xv_test)
    accuracy = accuracy_score(ytest, predictions)
    accuracies[name] = accuracy * 100
    model_file = f"{name.lower().replace(' ', '_')}.jb"
    model_files[name] = model_file
    print(f"\nModel: {name} (Accuracy: {accuracy * 100:.2f}%)\n")
    print(classification_report(ytest, predictions))
    joblib.dump(model, model_file)



Model: Logistic Regression (Accuracy: 98.69%)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5895
           1       0.98      0.99      0.99      5330

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225


Model: SVM (Accuracy: 99.34%)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5895
           1       0.99      0.99      0.99      5330

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225


Model: Naïve Bayes (Accuracy: 93.03%)

              precision    recall  f1-score   support

           0       0.93      0.94      0.93      5895
           1       0.93      0.92      0.93      5330

    accuracy                           0.93     11225
   macro

In [29]:
print(classification_report(ytest,predictions))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5895
           1       0.99      1.00      1.00      5330

    accuracy                           1.00     11225
   macro avg       1.00      1.00      1.00     11225
weighted avg       1.00      1.00      1.00     11225



In [30]:
joblib.dump(vectorizer, "vectorizer.jb")
# Save accuracies after training
joblib.dump(accuracies, "model_accuracies.jb")


['model_accuracies.jb']

In [None]:
accuracies = joblib.load("model_accuracies.jb")


model_files = {
    "Logistic Regression": "logistic_regression.jb",
    "SVM": "svm.jb",
    "Naïve Bayes": "naïve_bayes.jb",
    "Random Forest": "random_forest.jb",
    "Gradient Boosting": "gradient_boosting.jb"
}


In [6]:
import requests
import pandas as pd


In [8]:
def fetch_news(api_key, query='fake news', language='en', page_size=5):
    url = f'https://newsapi.org/v2/everything?q={query}&language={language}&pageSize={page_size}&apiKey={"6f71237ad68849519af767614c450566"}'
    response = requests.get(url)
    data = response.json()
    articles = data.get('articles', [])
    news_list = [{
        'title': article['title'],
        'description': article['description'],
        'content': article['content'],
        'source': article['source']['name'],
        'url': article['url'],
        'publishedAt': article['publishedAt']
    } for article in articles]
    
    return pd.DataFrame(news_list)


In [53]:
api_key = '6f71237ad68849519af767614c450566'  
news_df = fetch_news(api_key)
news_df.head()

Unnamed: 0,title,description,content,source,url,publishedAt
0,The fake promise of better Siri,Apple Intelligence is technically lots of thin...,"On The Vergecast: AI gadget failures, how Tesl...",The Verge,https://www.theverge.com/the-vergecast/629652/...,2025-03-14T12:50:21Z
1,Stocks Plunge After Trump Declares Web Rumor o...,The stock market had a brief moment of hope on...,The stock market went on a rollercoaster ride ...,Gizmodo.com,https://gizmodo.com/stocks-plunge-after-trump-...,2025-04-07T17:32:00Z
2,People Making AI Studio Ghibli Images Are Now ...,"Meanwhile, actual Ghibli creation Princess Mon...",The trend of using Open AI’s ChatGPT to create...,Gizmodo.com,https://gizmodo.com/ghibli-ai-chatgpt-fake-cea...,2025-03-28T20:35:34Z
3,Prosper murders 'expose lack of control' over ...,Nicholas Prosper bought a gun with a fake lice...,Phil Shepka &amp; Laura Foster\r\nWatch: Kille...,BBC News,https://www.bbc.com/news/articles/ce8vpz7dev5o,2025-03-20T01:26:41Z
4,The Depressing Reason Those Terrible Fake Movi...,"As AI only gets better at fooling audiences, m...",The latest viral AI craze has seen quickly gen...,Gizmodo.com,https://gizmodo.com/the-depressing-reason-thos...,2025-03-28T17:30:45Z


In [57]:
def combine_fields(df):
    df['text'] = df[['title', 'description', 'content']].fillna('').agg(' '.join, axis=1)
    return df

news_df = combine_fields(news_df)
news_df.head()

Unnamed: 0,title,description,content,source,url,publishedAt,text
0,The fake promise of better Siri,Apple Intelligence is technically lots of thin...,"On The Vergecast: AI gadget failures, how Tesl...",The Verge,https://www.theverge.com/the-vergecast/629652/...,2025-03-14T12:50:21Z,The fake promise of better Siri Apple Intellig...
1,Stocks Plunge After Trump Declares Web Rumor o...,The stock market had a brief moment of hope on...,The stock market went on a rollercoaster ride ...,Gizmodo.com,https://gizmodo.com/stocks-plunge-after-trump-...,2025-04-07T17:32:00Z,Stocks Plunge After Trump Declares Web Rumor o...
2,People Making AI Studio Ghibli Images Are Now ...,"Meanwhile, actual Ghibli creation Princess Mon...",The trend of using Open AI’s ChatGPT to create...,Gizmodo.com,https://gizmodo.com/ghibli-ai-chatgpt-fake-cea...,2025-03-28T20:35:34Z,People Making AI Studio Ghibli Images Are Now ...
3,Prosper murders 'expose lack of control' over ...,Nicholas Prosper bought a gun with a fake lice...,Phil Shepka &amp; Laura Foster\r\nWatch: Kille...,BBC News,https://www.bbc.com/news/articles/ce8vpz7dev5o,2025-03-20T01:26:41Z,Prosper murders 'expose lack of control' over ...
4,The Depressing Reason Those Terrible Fake Movi...,"As AI only gets better at fooling audiences, m...",The latest viral AI craze has seen quickly gen...,Gizmodo.com,https://gizmodo.com/the-depressing-reason-thos...,2025-03-28T17:30:45Z,The Depressing Reason Those Terrible Fake Movi...


In [58]:
import re
import string



In [59]:

def clean_text(text):
    text = text.lower()
    text = re.sub('\[.*?\]', "", text)
    text = re.sub("\\W", " ", text)
    text = re.sub("https?://\S+|www\.\S+", "", text)
    text = re.sub("<.*?>+", "", text)
    text = re.sub("[%s]" % re.escape(string.punctuation), "", text)
    text = re.sub("\n", "", text)
    text = re.sub("\w*\d\w*", "", text)
    return text


In [60]:
news_df['clean_text'] = news_df['text'].apply(clean_text)
news_df[['clean_text']].head()

Unnamed: 0,clean_text
0,the fake promise of better siri apple intellig...
1,stocks plunge after trump declares web rumor o...
2,people making ai studio ghibli images are now ...
3,prosper murders expose lack of control over ...
4,the depressing reason those terrible fake movi...


In [62]:
import joblib

In [63]:
vectorizer = joblib.load("vectorizer.jb")

In [64]:
X_real_time = vectorizer.transform(news_df['clean_text'])

In [65]:
joblib.dump(vectorizer, 'new_vectorizer.jb')


['new_vectorizer.jb']

In [66]:
import joblib

In [67]:
logistic_model = joblib.load('logistic_regression.jb')
random_forest_model = joblib.load('random_forest.jb')
naive_bayes_model = joblib.load('naive_bayes.jb')
svm_model = joblib.load('svm.jb')
gradient_boosting_model = joblib.load('gradient_boosting.jb')

In [68]:
X_real_time = vectorizer.transform(news_df['clean_text'])

In [71]:
news_df['logistic_pred'] = logistic_model.predict(X_real_time)
news_df['random_forest_pred'] = random_forest_model.predict(X_real_time)
news_df['naive_bayes_pred'] = naive_bayes_model.predict(X_real_time)
news_df['svm_pred'] = svm_model.predict(X_real_time)
news_df['gb_pred'] = gradient_boosting_model.predict(X_real_time)

In [None]:
label_map = {0: 'Fake', 1: 'Real'}

for model in ['logistic', 'random_forest', 'naive_bayes', 'svm', 'gb']:
    pred_col = f"{model}_pred"
    label_col = f"{model}_label"
    news_df[label_col] = news_df[pred_col].map(label_map)


In [73]:
news_df[['title', 'logistic_label', 'random_forest_label', 
         'naive_bayes_label', 'svm_label', 'gb_label']].head(10)


Unnamed: 0,title,logistic_label,random_forest_label,naive_bayes_label,svm_label,gb_label
0,The fake promise of better Siri,Fake,Fake,Fake,Fake,Fake
1,Stocks Plunge After Trump Declares Web Rumor o...,Fake,Fake,Fake,Fake,Fake
2,People Making AI Studio Ghibli Images Are Now ...,Fake,Fake,Fake,Fake,Fake
3,Prosper murders 'expose lack of control' over ...,Fake,Fake,Fake,Fake,Fake
4,The Depressing Reason Those Terrible Fake Movi...,Fake,Fake,Fake,Fake,Fake


In [74]:
# List of individual model label columns
label_columns = [
    'logistic_label',
    'random_forest_label',
    'naive_bayes_label',
    'svm_label',
    'gb_label'
]

# Compute the majority vote for each row
news_df['true_label'] = news_df[label_columns].mode(axis=1)[0]


In [75]:
label_map = {0: 'Fake', 1: 'Real'}
news_df['final_verdict'] = news_df['true_label'].map(label_map)


In [76]:
# Print the model predictions along with the final true label
print(news_df[['title'] + label_columns + ['true_label', 'final_verdict']].head())

                                               title logistic_label  \
0                    The fake promise of better Siri           Fake   
1  Stocks Plunge After Trump Declares Web Rumor o...           Fake   
2  People Making AI Studio Ghibli Images Are Now ...           Fake   
3  Prosper murders 'expose lack of control' over ...           Fake   
4  The Depressing Reason Those Terrible Fake Movi...           Fake   

  random_forest_label naive_bayes_label svm_label gb_label true_label  \
0                Fake              Fake      Fake     Fake       Fake   
1                Fake              Fake      Fake     Fake       Fake   
2                Fake              Fake      Fake     Fake       Fake   
3                Fake              Fake      Fake     Fake       Fake   
4                Fake              Fake      Fake     Fake       Fake   

  final_verdict  
0           NaN  
1           NaN  
2           NaN  
3           NaN  
4           NaN  


In [None]:
news_df[['text', 'true_label']].to_csv("cleaned_news_labels.csv", index=False)



In [79]:
news_df.drop(columns=['final_verdict'], inplace=True)

In [80]:
label_map = {'Fake': 0, 'Real': 1}
news_df['true_label'] = news_df['true_label'].map(label_map)

In [81]:
print(news_df[['text', 'true_label']].head())


                                                text  true_label
0  The fake promise of better Siri Apple Intellig...           0
1  Stocks Plunge After Trump Declares Web Rumor o...           0
2  People Making AI Studio Ghibli Images Are Now ...           0
3  Prosper murders 'expose lack of control' over ...           0
4  The Depressing Reason Those Terrible Fake Movi...           0


In [82]:
news_df[['text', 'true_label']].to_csv("cleaned_news_labels.csv", index=False)

In [34]:
import os

In [36]:
news_df.to_csv("real_time_news_predictions.csv", mode='a', header=not os.path.exists("real_time_news_predictions.csv"), index=False)


In [38]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [None]:
def retrain_from_csv(csv_path="real_time_news_predictions.csv"):
    # Load CSV# Must contain 'text' and 'true_label'
    if 'text' not in df.columns or 'true_label' not in df.columns:
        raise ValueError("CSV must contain 'text' and 'true_label' columns")

    x = df['text']
    y = df['true_label']
    df = pd.read_csv(csv_path)