# Import the Lib

In [156]:
import numpy as np
import pandas as pd
import matplotlib.pyplot  as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import spacy
import re
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the data 

In [122]:
df_fake=pd.read_csv("/kaggle/input/fake-news-detection/fake.csv")
df_fake.head(2)

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"


In [124]:
df_true=pd.read_csv("/kaggle/input/fake-news-detection/true.csv")
df_true.head(2)

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"


# Data Preprocessing

In [125]:
print(df_fake.shape)
print(df_true.shape)

(23481, 4)
(21417, 4)


In [126]:
data=pd.concat([df_fake, df_true],axis=0)

In [127]:
data['class']='Nan'

In [128]:
data

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",Nan
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",Nan
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",Nan
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",Nan
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",Nan
...,...,...,...,...,...
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017",Nan
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017",Nan
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017",Nan
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017",Nan


In [130]:
data['class'][21418:]='True'
data['class'][0:21418]='Fake'

In [131]:
data.shape

(44898, 5)

In [132]:
data.describe()

Unnamed: 0,title,text,subject,date,class
count,44898,44898.0,44898,44898,44898
unique,38729,38646.0,8,2397,2
top,Factbox: Trump fills top jobs for his administ...,,politicsNews,"December 20, 2017",True
freq,14,627.0,11272,182,23480


In [133]:
data.subject.unique()

array(['News', 'politics', 'Government News', 'left-news', 'US_News',
       'Middle-east', 'politicsNews', 'worldnews'], dtype=object)

In [134]:
data.dtypes

title      object
text       object
subject    object
date       object
class      object
dtype: object

In [135]:
data.dtypes

title      object
text       object
subject    object
date       object
class      object
dtype: object

In [136]:
data.duplicated().sum()

209

In [137]:
data.drop_duplicates(inplace=True)

In [138]:
data.isnull().sum()

title      0
text       0
subject    0
date       0
class      0
dtype: int64

# Text Cleaning and Preprocessing

In [139]:
def clean_text(text):
    if isinstance(text, str):  
        clean_text = re.sub(r'[^\w\s]', '', text)
        clean_text = re.sub(r'\d+', '', clean_text)
        return clean_text.lower()  
    else:
        return '' 
data['title'] = data['title'].apply(clean_text)
data['text'] = data['text'].apply(clean_text)

In [140]:
data

Unnamed: 0,title,text,subject,date,class
0,donald trump sends out embarrassing new years...,donald trump just couldn t wish all americans ...,News,"December 31, 2017",Fake
1,drunk bragging trump staffer started russian ...,house intelligence committee chairman devin nu...,News,"December 31, 2017",Fake
2,sheriff david clarke becomes an internet joke...,on friday it was revealed that former milwauke...,News,"December 30, 2017",Fake
3,trump is so obsessed he even has obamas name ...,on christmas day donald trump announced that h...,News,"December 29, 2017",Fake
4,pope francis just called out donald trump dur...,pope francis used his annual christmas day mes...,News,"December 25, 2017",Fake
...,...,...,...,...,...
21412,fully committed nato backs new us approach on ...,brussels reuters nato allies on tuesday welco...,worldnews,"August 22, 2017",True
21413,lexisnexis withdrew two products from chinese ...,london reuters lexisnexis a provider of legal...,worldnews,"August 22, 2017",True
21414,minsk cultural hub becomes haven from authorities,minsk reuters in the shadow of disused soviet...,worldnews,"August 22, 2017",True
21415,vatican upbeat on possibility of pope francis ...,moscow reuters vatican secretary of state car...,worldnews,"August 22, 2017",True


In [141]:
data['Text_News']=data['text']
data['Title_News']=data['title']

In [142]:
data

Unnamed: 0,title,text,subject,date,class,Text_News,Title_News
0,donald trump sends out embarrassing new years...,donald trump just couldn t wish all americans ...,News,"December 31, 2017",Fake,donald trump just couldn t wish all americans ...,donald trump sends out embarrassing new years...
1,drunk bragging trump staffer started russian ...,house intelligence committee chairman devin nu...,News,"December 31, 2017",Fake,house intelligence committee chairman devin nu...,drunk bragging trump staffer started russian ...
2,sheriff david clarke becomes an internet joke...,on friday it was revealed that former milwauke...,News,"December 30, 2017",Fake,on friday it was revealed that former milwauke...,sheriff david clarke becomes an internet joke...
3,trump is so obsessed he even has obamas name ...,on christmas day donald trump announced that h...,News,"December 29, 2017",Fake,on christmas day donald trump announced that h...,trump is so obsessed he even has obamas name ...
4,pope francis just called out donald trump dur...,pope francis used his annual christmas day mes...,News,"December 25, 2017",Fake,pope francis used his annual christmas day mes...,pope francis just called out donald trump dur...
...,...,...,...,...,...,...,...
21412,fully committed nato backs new us approach on ...,brussels reuters nato allies on tuesday welco...,worldnews,"August 22, 2017",True,brussels reuters nato allies on tuesday welco...,fully committed nato backs new us approach on ...
21413,lexisnexis withdrew two products from chinese ...,london reuters lexisnexis a provider of legal...,worldnews,"August 22, 2017",True,london reuters lexisnexis a provider of legal...,lexisnexis withdrew two products from chinese ...
21414,minsk cultural hub becomes haven from authorities,minsk reuters in the shadow of disused soviet...,worldnews,"August 22, 2017",True,minsk reuters in the shadow of disused soviet...,minsk cultural hub becomes haven from authorities
21415,vatican upbeat on possibility of pope francis ...,moscow reuters vatican secretary of state car...,worldnews,"August 22, 2017",True,moscow reuters vatican secretary of state car...,vatican upbeat on possibility of pope francis ...


In [143]:
from nltk.tokenize import word_tokenize
data['title']=data['title'].apply(lambda x:word_tokenize(str(x)))
data['text']=data['text'].apply(lambda x:word_tokenize(str(x)))

In [144]:
data

Unnamed: 0,title,text,subject,date,class,Text_News,Title_News
0,"[donald, trump, sends, out, embarrassing, new,...","[donald, trump, just, couldn, t, wish, all, am...",News,"December 31, 2017",Fake,donald trump just couldn t wish all americans ...,donald trump sends out embarrassing new years...
1,"[drunk, bragging, trump, staffer, started, rus...","[house, intelligence, committee, chairman, dev...",News,"December 31, 2017",Fake,house intelligence committee chairman devin nu...,drunk bragging trump staffer started russian ...
2,"[sheriff, david, clarke, becomes, an, internet...","[on, friday, it, was, revealed, that, former, ...",News,"December 30, 2017",Fake,on friday it was revealed that former milwauke...,sheriff david clarke becomes an internet joke...
3,"[trump, is, so, obsessed, he, even, has, obama...","[on, christmas, day, donald, trump, announced,...",News,"December 29, 2017",Fake,on christmas day donald trump announced that h...,trump is so obsessed he even has obamas name ...
4,"[pope, francis, just, called, out, donald, tru...","[pope, francis, used, his, annual, christmas, ...",News,"December 25, 2017",Fake,pope francis used his annual christmas day mes...,pope francis just called out donald trump dur...
...,...,...,...,...,...,...,...
21412,"[fully, committed, nato, backs, new, us, appro...","[brussels, reuters, nato, allies, on, tuesday,...",worldnews,"August 22, 2017",True,brussels reuters nato allies on tuesday welco...,fully committed nato backs new us approach on ...
21413,"[lexisnexis, withdrew, two, products, from, ch...","[london, reuters, lexisnexis, a, provider, of,...",worldnews,"August 22, 2017",True,london reuters lexisnexis a provider of legal...,lexisnexis withdrew two products from chinese ...
21414,"[minsk, cultural, hub, becomes, haven, from, a...","[minsk, reuters, in, the, shadow, of, disused,...",worldnews,"August 22, 2017",True,minsk reuters in the shadow of disused soviet...,minsk cultural hub becomes haven from authorities
21415,"[vatican, upbeat, on, possibility, of, pope, f...","[moscow, reuters, vatican, secretary, of, stat...",worldnews,"August 22, 2017",True,moscow reuters vatican secretary of state car...,vatican upbeat on possibility of pope francis ...


In [145]:
def remove_stopwords(text):
    stop_words=set(stopwords.words('english'))
    tokens=word_tokenize(str(text))
    filtered_tokens=[word for word in tokens if word.lower() not in stop_words]
    return ' '.join(filtered_tokens)
columns_to_process=['text','title']
for column in columns_to_process:
    data[column+'_no_stopwords']=data[column].apply(remove_stopwords)

In [146]:
data

Unnamed: 0,title,text,subject,date,class,Text_News,Title_News,text_no_stopwords,title_no_stopwords
0,"[donald, trump, sends, out, embarrassing, new,...","[donald, trump, just, couldn, t, wish, all, am...",News,"December 31, 2017",Fake,donald trump just couldn t wish all americans ...,donald trump sends out embarrassing new years...,"[ 'donald ' , 'trump ' , 'just ' , 'couldn ' ,...","[ 'donald ' , 'trump ' , 'sends ' , 'out ' , '..."
1,"[drunk, bragging, trump, staffer, started, rus...","[house, intelligence, committee, chairman, dev...",News,"December 31, 2017",Fake,house intelligence committee chairman devin nu...,drunk bragging trump staffer started russian ...,"[ 'house ' , 'intelligence ' , 'committee ' , ...","[ 'drunk ' , 'bragging ' , 'trump ' , 'staffer..."
2,"[sheriff, david, clarke, becomes, an, internet...","[on, friday, it, was, revealed, that, former, ...",News,"December 30, 2017",Fake,on friday it was revealed that former milwauke...,sheriff david clarke becomes an internet joke...,"[ 'on ' , 'friday ' , 'it ' , 'was ' , 'reveal...","[ 'sheriff ' , 'david ' , 'clarke ' , 'becomes..."
3,"[trump, is, so, obsessed, he, even, has, obama...","[on, christmas, day, donald, trump, announced,...",News,"December 29, 2017",Fake,on christmas day donald trump announced that h...,trump is so obsessed he even has obamas name ...,"[ 'on ' , 'christmas ' , 'day ' , 'donald ' , ...","[ 'trump ' , 'is ' , 'so ' , 'obsessed ' , 'he..."
4,"[pope, francis, just, called, out, donald, tru...","[pope, francis, used, his, annual, christmas, ...",News,"December 25, 2017",Fake,pope francis used his annual christmas day mes...,pope francis just called out donald trump dur...,"[ 'pope ' , 'francis ' , 'used ' , 'his ' , 'a...","[ 'pope ' , 'francis ' , 'just ' , 'called ' ,..."
...,...,...,...,...,...,...,...,...,...
21412,"[fully, committed, nato, backs, new, us, appro...","[brussels, reuters, nato, allies, on, tuesday,...",worldnews,"August 22, 2017",True,brussels reuters nato allies on tuesday welco...,fully committed nato backs new us approach on ...,"[ 'brussels ' , 'reuters ' , 'nato ' , 'allies...","[ 'fully ' , 'committed ' , 'nato ' , 'backs '..."
21413,"[lexisnexis, withdrew, two, products, from, ch...","[london, reuters, lexisnexis, a, provider, of,...",worldnews,"August 22, 2017",True,london reuters lexisnexis a provider of legal...,lexisnexis withdrew two products from chinese ...,"[ 'london ' , 'reuters ' , 'lexisnexis ' , 'a ...","[ 'lexisnexis ' , 'withdrew ' , 'two ' , 'prod..."
21414,"[minsk, cultural, hub, becomes, haven, from, a...","[minsk, reuters, in, the, shadow, of, disused,...",worldnews,"August 22, 2017",True,minsk reuters in the shadow of disused soviet...,minsk cultural hub becomes haven from authorities,"[ 'minsk ' , 'reuters ' , 'in ' , 'the ' , 'sh...","[ 'minsk ' , 'cultural ' , 'hub ' , 'becomes '..."
21415,"[vatican, upbeat, on, possibility, of, pope, f...","[moscow, reuters, vatican, secretary, of, stat...",worldnews,"August 22, 2017",True,moscow reuters vatican secretary of state car...,vatican upbeat on possibility of pope francis ...,"[ 'moscow ' , 'reuters ' , 'vatican ' , 'secre...","[ 'vatican ' , 'upbeat ' , 'on ' , 'possibilit..."


In [147]:
sid = SentimentIntensityAnalyzer()
def determine_sentiment(row):
    title_sentiment = sid.polarity_scores(str(row['title_no_stopwords']))['compound']
    text_sentiment = sid.polarity_scores(str(row['text_no_stopwords']))['compound']
    combined_sentiment = (title_sentiment + text_sentiment) / 2
    if combined_sentiment >= 0.05:
        return 'Positive'
    elif combined_sentiment <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'
data['sentiment'] = data.apply(determine_sentiment, axis=1)

In [148]:
data

Unnamed: 0,title,text,subject,date,class,Text_News,Title_News,text_no_stopwords,title_no_stopwords,sentiment
0,"[donald, trump, sends, out, embarrassing, new,...","[donald, trump, just, couldn, t, wish, all, am...",News,"December 31, 2017",Fake,donald trump just couldn t wish all americans ...,donald trump sends out embarrassing new years...,"[ 'donald ' , 'trump ' , 'just ' , 'couldn ' ,...","[ 'donald ' , 'trump ' , 'sends ' , 'out ' , '...",Negative
1,"[drunk, bragging, trump, staffer, started, rus...","[house, intelligence, committee, chairman, dev...",News,"December 31, 2017",Fake,house intelligence committee chairman devin nu...,drunk bragging trump staffer started russian ...,"[ 'house ' , 'intelligence ' , 'committee ' , ...","[ 'drunk ' , 'bragging ' , 'trump ' , 'staffer...",Negative
2,"[sheriff, david, clarke, becomes, an, internet...","[on, friday, it, was, revealed, that, former, ...",News,"December 30, 2017",Fake,on friday it was revealed that former milwauke...,sheriff david clarke becomes an internet joke...,"[ 'on ' , 'friday ' , 'it ' , 'was ' , 'reveal...","[ 'sheriff ' , 'david ' , 'clarke ' , 'becomes...",Negative
3,"[trump, is, so, obsessed, he, even, has, obama...","[on, christmas, day, donald, trump, announced,...",News,"December 29, 2017",Fake,on christmas day donald trump announced that h...,trump is so obsessed he even has obamas name ...,"[ 'on ' , 'christmas ' , 'day ' , 'donald ' , ...","[ 'trump ' , 'is ' , 'so ' , 'obsessed ' , 'he...",Negative
4,"[pope, francis, just, called, out, donald, tru...","[pope, francis, used, his, annual, christmas, ...",News,"December 25, 2017",Fake,pope francis used his annual christmas day mes...,pope francis just called out donald trump dur...,"[ 'pope ' , 'francis ' , 'used ' , 'his ' , 'a...","[ 'pope ' , 'francis ' , 'just ' , 'called ' ,...",Positive
...,...,...,...,...,...,...,...,...,...,...
21412,"[fully, committed, nato, backs, new, us, appro...","[brussels, reuters, nato, allies, on, tuesday,...",worldnews,"August 22, 2017",True,brussels reuters nato allies on tuesday welco...,fully committed nato backs new us approach on ...,"[ 'brussels ' , 'reuters ' , 'nato ' , 'allies...","[ 'fully ' , 'committed ' , 'nato ' , 'backs '...",Positive
21413,"[lexisnexis, withdrew, two, products, from, ch...","[london, reuters, lexisnexis, a, provider, of,...",worldnews,"August 22, 2017",True,london reuters lexisnexis a provider of legal...,lexisnexis withdrew two products from chinese ...,"[ 'london ' , 'reuters ' , 'lexisnexis ' , 'a ...","[ 'lexisnexis ' , 'withdrew ' , 'two ' , 'prod...",Positive
21414,"[minsk, cultural, hub, becomes, haven, from, a...","[minsk, reuters, in, the, shadow, of, disused,...",worldnews,"August 22, 2017",True,minsk reuters in the shadow of disused soviet...,minsk cultural hub becomes haven from authorities,"[ 'minsk ' , 'reuters ' , 'in ' , 'the ' , 'sh...","[ 'minsk ' , 'cultural ' , 'hub ' , 'becomes '...",Positive
21415,"[vatican, upbeat, on, possibility, of, pope, f...","[moscow, reuters, vatican, secretary, of, stat...",worldnews,"August 22, 2017",True,moscow reuters vatican secretary of state car...,vatican upbeat on possibility of pope francis ...,"[ 'moscow ' , 'reuters ' , 'vatican ' , 'secre...","[ 'vatican ' , 'upbeat ' , 'on ' , 'possibilit...",Positive


# Feature Selection

In [149]:
xx=data[['Title_News','Text_News','subject','sentiment']]
yy=data['class']

In [150]:
xx.columns

Index(['Title_News', 'Text_News', 'subject', 'sentiment'], dtype='object')

In [151]:
xx[['Title_News', 'Text_News', 'subject', 'sentiment']]=xx[['Title_News', 'Text_News', 'subject', 'sentiment']].apply(LabelEncoder().fit_transform)
print(xx)

       Title_News  Text_News  subject  sentiment
0            1710       8397        2          0
1            1777      11677        2          0
2            5378      19719        2          0
3            6799      19681        2          0
4            4490      21080        2          2
...           ...        ...      ...        ...
21412       17309       5758        7          2
21413       21601      15709        7          2
21414       22890      17201        7          2
21415       36353      17971        7          2
21416       19602      13732        7          2

[44689 rows x 4 columns]


In [152]:
yy = yy.replace({'Fake': 0, 'True': 1})
print(yy)

0        0
1        0
2        0
3        0
4        0
        ..
21412    1
21413    1
21414    1
21415    1
21416    1
Name: class, Length: 44689, dtype: int64


In [153]:
scaler=MinMaxScaler()
xx=scaler.fit_transform(xx)

# Model Prediction

In [154]:
X_train, X_test, y_train, y_test=train_test_split(xx,yy,test_size=0.2, random_state=42)
classifiers=[
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    SVC()
]
for classifier in classifiers:
    classifier_name=classifier.__class__.__name__
    print(f"Training {classifier_name}...")
    classifier.fit(X_train, y_train)
    y_pred=classifier.predict(X_test)
    accuracy=accuracy_score(y_test, y_pred)
    print(f"\n {classifier_name} \n Accuracy: {accuracy:.4f}\n")

Training LogisticRegression...

 LogisticRegression 
 Accuracy: 0.8810

Training DecisionTreeClassifier...

 DecisionTreeClassifier 
 Accuracy: 0.9806

Training RandomForestClassifier...

 RandomForestClassifier 
 Accuracy: 0.9891

Training SVC...

 SVC 
 Accuracy: 0.9790



# Prediction Scores

    * Best Score is Random Forest Classifier 98.91% 