## Fake News Classifier Using LSTM

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv(
    '/content/trained.csv',
    engine='python',
    on_bad_lines='skip'
)


In [3]:
df.head()

Unnamed: 0,index,title,text,subject,date,class,Unnamed: 6
0,0,PRESIDENT TRUMP Explains New “America First” R...,That s what we re talking about! Another campa...,,"Aug 2, 2017",Fake,
1,1,TERMINALLY ILL FORMER MISS WI: “Until my last ...,How is it that Sean Hannity is the only media ...,politics,"Oct 4, 2016",Fake,
2,2,Cruz Humiliated By Moderator After Lie About ...,Almost immediately after learning that longtim...,,"February 13, 2016",Fake,
3,3,"Russia revels in Trump victory, looks to sanct...",MOSCOW (Reuters) - For all their mutual praise...,politicsNews,"November 9, 2016",,
4,4,Trump's bid to open U.S. monuments to developm...,WASHINGTON (Reuters) - The Trump administratio...,politicsNews,"May 26, 2017",Real,


In [4]:
df.drop(columns=['Unnamed: 6'], inplace=True)
df.rename(columns={'class': 'label'}, inplace=True)
df['label'] = (
    df['label']
    .astype(str)
    .str.strip()
    .str.upper()
    .map({'FAKE': 1, 'REAL': 0})
)

# Drop rows where label could not be mapped
df = df.dropna(subset=['label'])

# Convert safely to int
df['label'] = df['label'].astype(int)

df.head()

Unnamed: 0,index,title,text,subject,date,label
0,0,PRESIDENT TRUMP Explains New “America First” R...,That s what we re talking about! Another campa...,,"Aug 2, 2017",1
1,1,TERMINALLY ILL FORMER MISS WI: “Until my last ...,How is it that Sean Hannity is the only media ...,politics,"Oct 4, 2016",1
2,2,Cruz Humiliated By Moderator After Lie About ...,Almost immediately after learning that longtim...,,"February 13, 2016",1
4,4,Trump's bid to open U.S. monuments to developm...,WASHINGTON (Reuters) - The Trump administratio...,politicsNews,"May 26, 2017",0
5,5,UNREAL! HERE’S WHY ICE RELEASED BUT DIDN’T DEP...,THANK GOODNESS FOR THE CENTER FOR IMMIGRATION ...,Government News,"Apr 28, 2016",1


In [5]:
df.shape

(13233, 6)

In [6]:
df.isnull().sum()

Unnamed: 0,0
index,0
title,0
text,0
subject,2
date,0
label,0


In [7]:
###Drop Nan Values
df=df.dropna()


In [8]:
df.head()

Unnamed: 0,index,title,text,subject,date,label
1,1,TERMINALLY ILL FORMER MISS WI: “Until my last ...,How is it that Sean Hannity is the only media ...,politics,"Oct 4, 2016",1
4,4,Trump's bid to open U.S. monuments to developm...,WASHINGTON (Reuters) - The Trump administratio...,politicsNews,"May 26, 2017",0
5,5,UNREAL! HERE’S WHY ICE RELEASED BUT DIDN’T DEP...,THANK GOODNESS FOR THE CENTER FOR IMMIGRATION ...,Government News,"Apr 28, 2016",1
6,6,CONSEQUENCES OF LIBERAL TOLERANCE: He Had An I...,Terrified concert goers fled an Ariana Grande ...,politics,"May 25, 2017",1
7,7,U.S. NEWS and WORLD REPORT Publishes List Of T...,"More than 21,000 people from all regions of th...",politics,"Jul 13, 2017",1


In [9]:
## Get the Independent Features

X=df.drop('label',axis=1)

In [10]:
## Get the Dependent features
y=df['label']

In [11]:
X.shape

(13231, 5)

In [12]:
y.shape

(13231,)

In [13]:
import tensorflow as tf

In [14]:
tf.__version__

'2.19.0'

In [15]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

In [16]:
### Vocabulary size
voc_size=5000

### Onehot Representation

In [17]:
messages=X.copy()

In [18]:
messages['title'][1]

'TERMINALLY ILL FORMER MISS WI: “Until my last breath, I will use this voice to tell who Mr. Trump really is” [VIDEO]'

In [19]:
messages

Unnamed: 0,index,title,text,subject,date
1,1,TERMINALLY ILL FORMER MISS WI: “Until my last ...,How is it that Sean Hannity is the only media ...,politics,"Oct 4, 2016"
4,4,Trump's bid to open U.S. monuments to developm...,WASHINGTON (Reuters) - The Trump administratio...,politicsNews,"May 26, 2017"
5,5,UNREAL! HERE’S WHY ICE RELEASED BUT DIDN’T DEP...,THANK GOODNESS FOR THE CENTER FOR IMMIGRATION ...,Government News,"Apr 28, 2016"
6,6,CONSEQUENCES OF LIBERAL TOLERANCE: He Had An I...,Terrified concert goers fled an Ariana Grande ...,politics,"May 25, 2017"
7,7,U.S. NEWS and WORLD REPORT Publishes List Of T...,"More than 21,000 people from all regions of th...",politics,"Jul 13, 2017"
...,...,...,...,...,...
13230,13230,WOW! BLACK TRUMP SUPPORTER’S Epic Takedown Of ...,This wonderful lady is so right and so dead on...,politics,"Oct 2, 2016"
13231,13231,Trump has 'warm rapport' with Philippines' Dut...,WASHINGTON (Reuters) - U.S. President Donald T...,worldnews,"October 31, 2017"
13232,13232,RELIGION OF PROGRESSIVISM: Meet Obama’s NEW Tr...,The religion of Progressivism is working overt...,left-news,"May 21, 2016"
13233,13233,"Rubio seeks re-election to Senate, says Trump ...",WASHINGTON (Reuters) - Former Republican presi...,politicsNews,"June 22, 2016"


In [20]:
messages.reset_index(inplace=True)

In [21]:
messages

Unnamed: 0,level_0,index,title,text,subject,date
0,1,1,TERMINALLY ILL FORMER MISS WI: “Until my last ...,How is it that Sean Hannity is the only media ...,politics,"Oct 4, 2016"
1,4,4,Trump's bid to open U.S. monuments to developm...,WASHINGTON (Reuters) - The Trump administratio...,politicsNews,"May 26, 2017"
2,5,5,UNREAL! HERE’S WHY ICE RELEASED BUT DIDN’T DEP...,THANK GOODNESS FOR THE CENTER FOR IMMIGRATION ...,Government News,"Apr 28, 2016"
3,6,6,CONSEQUENCES OF LIBERAL TOLERANCE: He Had An I...,Terrified concert goers fled an Ariana Grande ...,politics,"May 25, 2017"
4,7,7,U.S. NEWS and WORLD REPORT Publishes List Of T...,"More than 21,000 people from all regions of th...",politics,"Jul 13, 2017"
...,...,...,...,...,...,...
13226,13230,13230,WOW! BLACK TRUMP SUPPORTER’S Epic Takedown Of ...,This wonderful lady is so right and so dead on...,politics,"Oct 2, 2016"
13227,13231,13231,Trump has 'warm rapport' with Philippines' Dut...,WASHINGTON (Reuters) - U.S. President Donald T...,worldnews,"October 31, 2017"
13228,13232,13232,RELIGION OF PROGRESSIVISM: Meet Obama’s NEW Tr...,The religion of Progressivism is working overt...,left-news,"May 21, 2016"
13229,13233,13233,"Rubio seeks re-election to Senate, says Trump ...",WASHINGTON (Reuters) - Former Republican presi...,politicsNews,"June 22, 2016"


In [22]:
import nltk
import re
from nltk.corpus import stopwords

In [23]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [24]:
### Dataset Preprocessing
from nltk.stem.porter import PorterStemmer ##stemming purpose
ps = PorterStemmer()
corpus = []
for i in range(0, len(messages)):
    review = re.sub('[^a-zA-Z]', ' ', messages['title'].iloc[i])
    review = review.lower()
    review = review.split()

    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

In [25]:
corpus

['termin ill former miss wi last breath use voic tell mr trump realli video',
 'trump bid open u monument develop draw call protect',
 'unreal ice releas deport crimin illeg video',
 'consequ liber toler isi flag hang roof yet one report video',
 'u news world report publish list top popular nation refuge want live',
 'trump administr nafta demand make sens union pacif ceo',
 'turk flock social media gold trader sanction case',
 'trump longtim advis offici ban appear cnn racist',
 'eu state push reform labor rule sought franc macron',
 'sit gop senat enough donat alabama democrat senat imag',
 'boom harri faulkner blow russia collus theori one smart question video',
 'angri leftist caught video steal student trump hat demand school make stop wear hat f f ing freedom speech boy',
 'mcmaster gave susan rice continu access classifi info still clearanc',
 'pastor shot kill middl church servic ohio',
 'china take action thousand websit harm obscen content',
 'van load illeg move releas away

In [26]:
corpus[1]

'trump bid open u monument develop draw call protect'

In [27]:
onehot_repr=[one_hot(words,voc_size)for words in corpus]
onehot_repr

[[1705,
  4589,
  1349,
  413,
  3311,
  4846,
  2584,
  956,
  3380,
  4516,
  2879,
  4582,
  533,
  2790],
 [4582, 745, 2425, 2032, 4648, 3340, 3327, 3816, 2846],
 [4891, 1429, 713, 568, 2616, 4537, 2790],
 [1709, 2262, 484, 1507, 4258, 1864, 953, 1291, 4052, 437, 2790],
 [2032, 3980, 881, 437, 4553, 2415, 249, 3109, 4226, 2902, 859, 501],
 [4582, 1464, 3582, 1792, 3719, 4095, 2426, 1354, 2213],
 [2694, 4726, 4284, 60, 219, 836, 2956, 1474],
 [4582, 3681, 3172, 3651, 525, 1038, 2712, 636],
 [1321, 1405, 4832, 2590, 2966, 63, 4979, 1465, 453],
 [1701, 3631, 4385, 1704, 3318, 3600, 699, 4385, 1192],
 [2894, 1587, 567, 2618, 4084, 2614, 1040, 4052, 1818, 4477, 2790],
 [6,
  4737,
  1168,
  2790,
  4823,
  341,
  4582,
  2974,
  1792,
  1147,
  3719,
  3204,
  1641,
  2974,
  1575,
  1575,
  117,
  4524,
  3726,
  2676],
 [2339, 2064, 1310, 2270, 2738, 333, 1957, 3718, 205, 3513],
 [4250, 318, 1957, 1768, 1197, 2056, 288],
 [2780, 2770, 4195, 2094, 793, 377, 4103, 2930],
 [4143, 4219, 4

In [28]:
corpus[1]

'trump bid open u monument develop draw call protect'

In [29]:
onehot_repr[1]

[4582, 745, 2425, 2032, 4648, 3340, 3327, 3816, 2846]

### Embedding Representation

In [30]:
sent_length=20
embedded_docs=pad_sequences(onehot_repr,padding='post',maxlen=sent_length)
print(embedded_docs)

[[1705 4589 1349 ...    0    0    0]
 [4582  745 2425 ...    0    0    0]
 [4891 1429  713 ...    0    0    0]
 ...
 [4512 2513 3424 ...    0    0    0]
 [ 791 2338 2162 ...    0    0    0]
 [4571 4737 2267 ...    0    0    0]]


In [31]:
embedded_docs[1]

array([4582,  745, 2425, 2032, 4648, 3340, 3327, 3816, 2846,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0], dtype=int32)

In [32]:
embedded_docs[0]

array([1705, 4589, 1349,  413, 3311, 4846, 2584,  956, 3380, 4516, 2879,
       4582,  533, 2790,    0,    0,    0,    0,    0,    0], dtype=int32)

In [33]:
## Creating model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Input

embedding_vector_features = 40  # feature representation

model = Sequential([
    Input(shape=(sent_length,)),
    Embedding(voc_size, embedding_vector_features),
    LSTM(100),
    Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()


In [34]:
len(embedded_docs),y.shape

(13231, (13231,))

In [35]:
import numpy as np
X_final=np.array(embedded_docs)
y_final=np.array(y)

In [36]:
X_final.shape,y_final.shape

((13231, 20), (13231,))

In [37]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.33, random_state=42)

### Model Training

In [38]:
### Finally Training
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,batch_size=64)

Epoch 1/10
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 24ms/step - accuracy: 0.7937 - loss: 0.4437 - val_accuracy: 0.9166 - val_loss: 0.2049
Epoch 2/10
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.9584 - loss: 0.1225 - val_accuracy: 0.9267 - val_loss: 0.1896
Epoch 3/10
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.9741 - loss: 0.0789 - val_accuracy: 0.9274 - val_loss: 0.2049
Epoch 4/10
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 42ms/step - accuracy: 0.9825 - loss: 0.0611 - val_accuracy: 0.9146 - val_loss: 0.2407
Epoch 5/10
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.9874 - loss: 0.0420 - val_accuracy: 0.9137 - val_loss: 0.3181
Epoch 6/10
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.9887 - loss: 0.0391 - val_accuracy: 0.9125 - val_loss: 0.2903
Epoch 7/10
[1m139/139

<keras.src.callbacks.history.History at 0x7f298b2b5fd0>

### Performance Metrics And Accuracy

In [39]:
y_pred=model.predict(X_test)

[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


In [40]:
y_pred=np.where(y_pred > 0.5, 1,0) ##AUC ROC Curve

In [41]:
from sklearn.metrics import confusion_matrix

In [42]:
confusion_matrix(y_test,y_pred)

array([[1930,  169],
       [ 233, 2035]])

In [43]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.9079459583237921

In [44]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.89      0.92      0.91      2099
           1       0.92      0.90      0.91      2268

    accuracy                           0.91      4367
   macro avg       0.91      0.91      0.91      4367
weighted avg       0.91      0.91      0.91      4367

