In [1]:
import pandas as pd                   # For data manipulation (DataFrames)
import numpy as np                    # For numerical operations
import seaborn as sns                 # For data visualization (statistical graphics)
import matplotlib.pyplot as plt       # For plotting graphs

from sklearn.model_selection import train_test_split   # To split dataset into train/test sets
from sklearn.metrics import accuracy_score              # To calculate the accuracy of a model
from sklearn.metrics import classification_report       # To get precision, recall, f1-score, etc.

import re                            # For regular expressions (useful in text processing)
import string                        # For string operations (like punctuation removal)


In [6]:
data_fake = pd.read_csv("C:/Users/Dell/Downloads/Fake.csv")
data_true = pd.read_csv("C:/Users/Dell/Downloads/True.csv")


In [8]:
data_fake.head()


Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [10]:
data_true.head()


Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [11]:
data_fake["class"] = 0
data_true['class'] = 1

In [12]:
data_fake.shape,data_true.shape

((23481, 5), (21417, 5))

In [13]:
data_fake_manual_testing = data_fake.tail(10)
for i in range(23480, 23470, -1):
    data_fake.drop([i], axis=0, inplace=True)

data_true_manual_testing = data_true.tail(10)
for i in range(21416, 21406, -1):
    data_true.drop([i], axis=0, inplace=True)


In [14]:
data_fake.shape, data_true.shape


((23471, 5), (21407, 5))

In [15]:
data_fake_manual_testing.loc[:, 'class'] = 0
data_true_manual_testing.loc[:, 'class'] = 1


In [16]:
data_fake_manual_testing.head(), data_true_manual_testing.head()


(                                                   title  \
 23471  Seven Iranians freed in the prisoner swap have...   
 23472                      #Hashtag Hell & The Fake Left   
 23473  Astroturfing: Journalist Reveals Brainwashing ...   
 23474          The New American Century: An Era of Fraud   
 23475  Hillary Clinton: ‘Israel First’ (and no peace ...   
 
                                                     text      subject  \
 23471  21st Century Wire says This week, the historic...  Middle-east   
 23472   By Dady Chery and Gilbert MercierAll writers ...  Middle-east   
 23473  Vic Bishop Waking TimesOur reality is carefull...  Middle-east   
 23474  Paul Craig RobertsIn the last years of the 20t...  Middle-east   
 23475  Robert Fantina CounterpunchAlthough the United...  Middle-east   
 
                    date  class  
 23471  January 20, 2016      0  
 23472  January 19, 2016      0  
 23473  January 19, 2016      0  
 23474  January 19, 2016      0  
 23475  January 

In [18]:
# Show first 5 rows of fake manual test data
data_fake_manual_testing.head()




Unnamed: 0,title,text,subject,date,class
23471,Seven Iranians freed in the prisoner swap have...,"21st Century Wire says This week, the historic...",Middle-east,"January 20, 2016",0
23472,#Hashtag Hell & The Fake Left,By Dady Chery and Gilbert MercierAll writers ...,Middle-east,"January 19, 2016",0
23473,Astroturfing: Journalist Reveals Brainwashing ...,Vic Bishop Waking TimesOur reality is carefull...,Middle-east,"January 19, 2016",0
23474,The New American Century: An Era of Fraud,Paul Craig RobertsIn the last years of the 20t...,Middle-east,"January 19, 2016",0
23475,Hillary Clinton: ‘Israel First’ (and no peace ...,Robert Fantina CounterpunchAlthough the United...,Middle-east,"January 18, 2016",0


In [19]:
# Show first 5 rows of true manual test data
data_true_manual_testing.head()

Unnamed: 0,title,text,subject,date,class
21407,"Mata Pires, owner of embattled Brazil builder ...","SAO PAULO (Reuters) - Cesar Mata Pires, the ow...",worldnews,"August 22, 2017",1
21408,"U.S., North Korea clash at U.N. forum over nuc...",GENEVA (Reuters) - North Korea and the United ...,worldnews,"August 22, 2017",1
21409,"U.S., North Korea clash at U.N. arms forum on ...",GENEVA (Reuters) - North Korea and the United ...,worldnews,"August 22, 2017",1
21410,Headless torso could belong to submarine journ...,COPENHAGEN (Reuters) - Danish police said on T...,worldnews,"August 22, 2017",1
21411,North Korea shipments to Syria chemical arms a...,UNITED NATIONS (Reuters) - Two North Korean sh...,worldnews,"August 21, 2017",1


In [20]:
data_merge = pd.concat([data_fake, data_true], axis=0)
data_merge.head(10)


Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0
5,Racist Alabama Cops Brutalize Black Boy While...,The number of cases of cops brutalizing and ki...,News,"December 25, 2017",0
6,"Fresh Off The Golf Course, Trump Lashes Out A...",Donald Trump spent a good portion of his day a...,News,"December 23, 2017",0
7,Trump Said Some INSANELY Racist Stuff Inside ...,In the wake of yet another court decision that...,News,"December 23, 2017",0
8,Former CIA Director Slams Trump Over UN Bully...,Many people have raised the alarm regarding th...,News,"December 22, 2017",0
9,WATCH: Brand-New Pro-Trump Ad Features So Muc...,Just when you might have thought we d get a br...,News,"December 21, 2017",0


In [21]:
data_merge.columns

Index(['title', 'text', 'subject', 'date', 'class'], dtype='object')

In [22]:
data = data_merge.drop(['title','subject', 'date'],axis = 1)

In [23]:
data.isnull().sum()

text     0
class    0
dtype: int64

In [24]:
data = data.sample(frac = 1)

In [25]:
data.head()

Unnamed: 0,text,class
15288,BEIRUT (Reuters) - Lebanese President Michel A...,1
23299,Jay Dyer 21st Century WirePicking up where we ...,0
20080,LUXEMBOURG/BRUSSELS (Reuters) - Ryanair lost a...,1
8325,"David Daleidan, the ringleader for the decepti...",0
1958,Mexican President Enrique Pe a Nieto may not b...,0


In [26]:
data.reset_index(inplace = True)
data.drop(['index'],axis = 1,inplace = True)


In [27]:
data.columns

Index(['text', 'class'], dtype='object')

In [28]:
data.head()

Unnamed: 0,text,class
0,BEIRUT (Reuters) - Lebanese President Michel A...,1
1,Jay Dyer 21st Century WirePicking up where we ...,0
2,LUXEMBOURG/BRUSSELS (Reuters) - Ryanair lost a...,1
3,"David Daleidan, the ringleader for the decepti...",0
4,Mexican President Enrique Pe a Nieto may not b...,0


In [29]:
def wordopt(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\[.*?\]', '', text)  # Remove content inside square brackets
    text = re.sub(r'\W', ' ', text)  # Remove non-word characters
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Remove URLs
    text = re.sub(r'<.*?>+', '', text)  # Remove HTML tags
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)  # Remove punctuation
    text = re.sub(r'\n', '', text)  # Remove newlines
    text = re.sub(r'\w*\d\w*', '', text)  # Remove words containing numbers
    return text


In [30]:
data['text'] = data['text'].apply(wordopt)


In [31]:
x = data['text']
y = data['class']


In [32]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)


In [33]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Create the TF-IDF vectorizer
vectorization = TfidfVectorizer()

# Fit on training data and transform it
xv_train = vectorization.fit_transform(x_train)

# Transform test data (using same fitted vocab)
xv_test = vectorization.transform(x_test)


In [34]:
# 1. Import LogisticRegression
from sklearn.linear_model import LogisticRegression

# 2. Create and train the model
LR = LogisticRegression()
LR.fit(xv_train, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [35]:
# Make predictions on the test set
pred_lr = LR.predict(xv_test)

# Evaluate model accuracy
score = LR.score(xv_test, y_test)
print("Accuracy Score:", score)


Accuracy Score: 0.9877896613190731


In [36]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5814
           1       0.98      0.99      0.99      5406

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



In [37]:

from sklearn.tree import DecisionTreeClassifier

# 2. Train the model
DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [38]:
pred_dt = DT.predict(xv_test)

In [39]:
print("Accuracy:", DT.score(xv_test, y_test))

Accuracy: 0.9945632798573975


In [41]:
print(classification_report(y_test, pred_dt))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99      5814
           1       1.00      0.99      0.99      5406

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



In [42]:
from sklearn.ensemble import GradientBoostingClassifier


GB = GradientBoostingClassifier(random_state=0)
GB.fit(xv_train, y_train)

0,1,2
,loss,'log_loss'
,learning_rate,0.1
,n_estimators,100
,subsample,1.0
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,3
,min_impurity_decrease,0.0


In [43]:
predict_gb = GB.predict(xv_test)

In [44]:
print("Accuracy:", GB.score(xv_test, y_test))

Accuracy: 0.9955436720142602


In [46]:
print(classification_report(y_test, predict_gb))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5814
           1       0.99      1.00      1.00      5406

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



In [47]:
from sklearn.ensemble import RandomForestClassifier

# 2. Initialize and train the model
RF = RandomForestClassifier(random_state=0)
RF.fit(xv_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [48]:
pred_rf = RF.predict(xv_test)

In [49]:
print("Accuracy:", RF.score(xv_test, y_test))

Accuracy: 0.9885918003565063


In [50]:
print("Classification Report:\n", classification_report(y_test, pred_rf))

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      5814
           1       0.99      0.99      0.99      5406

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



In [51]:
def output_label(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"

def manual_testing(news):
    testing_news = {"text": [news]}
    new_df_test = pd.DataFrame(testing_news)
    
    # Clean the input
    new_df_test["text"] = new_df_test["text"].apply(wordopt)
    
    # Extract text and vectorize
    new_x_test = new_df_test["text"]
    new_xv_test = vectorization.transform(new_x_test)

    # Predict with all models
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_GBC = GB.predict(new_xv_test)
    pred_RFC = RF.predict(new_xv_test)

    # Display results
    return print(f"""
LR Prediction   : {output_label(pred_LR[0])}
DT Prediction   : {output_label(pred_DT[0])}
GBC Prediction  : {output_label(pred_GBC[0])}
RFC Prediction  : {output_label(pred_RFC[0])}
""")


In [52]:
news = str(input())
manual_testing(news)

 "NASA has confirmed the presence of water molecules on the sunlit surface of the Moon. The discovery, made using the SOFIA telescope, could have significant implications for future lunar exploration missions. Scientists believe this water may be trapped in tiny glass beads or between grains of lunar soil, and it could potentially be extracted for use by astronauts in the future."



LR Prediction   : Fake News
DT Prediction   : Fake News
GBC Prediction  : Fake News
RFC Prediction  : Fake News

