<a href="https://colab.research.google.com/github/Achyutha022/Fake-News-Classifier-using-LSTM/blob/main/FakeNewsClassifierUsingLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Fake News Classifier Using LSTM

Dataset: https://www.kaggle.com/c/fake-news/data#

In [20]:
import pandas as pd

In [21]:
# Load the datasets
true_df = pd.read_csv('True.csv')
fake_df = pd.read_csv('Fake.csv')

In [22]:
# Add a 'label' column to distinguish between true and fake news
true_df['label'] = 1
fake_df['label'] = 0

# Combine the dataframes
df = pd.concat([true_df, fake_df], ignore_index=True)

In [23]:
df.head()

Unnamed: 0,title,text,subject,date,label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [24]:
df.shape

(44898, 5)

In [25]:
df.head()

Unnamed: 0,title,text,subject,date,label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [26]:
df.isnull().sum()

Unnamed: 0,0
title,0
text,0
subject,0
date,0
label,0


In [27]:
###Drop Nan Values
df=df.dropna()


In [28]:
df.head()

Unnamed: 0,title,text,subject,date,label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",1
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",1
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",1
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",1
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",1


In [29]:
## Get the Independent Features

X=df.drop('label',axis=1)

In [30]:
## Get the Dependent features
y=df['label']

In [31]:
X.shape

(44898, 4)

In [32]:
y.shape

(44898,)

In [33]:
import tensorflow as tf

In [34]:
tf.__version__

'2.19.0'

In [35]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

In [36]:
### Vocabulary size
voc_size=7000

### Onehot Representation

In [37]:
messages=X.copy()

In [38]:
messages['title'][1]

'U.S. military to accept transgender recruits on Monday: Pentagon'

In [39]:
messages

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"
...,...,...,...,...
44893,McPain: John McCain Furious That Iran Treated ...,21st Century Wire says As 21WIRE reported earl...,Middle-east,"January 16, 2016"
44894,JUSTICE? Yahoo Settles E-mail Privacy Class-ac...,21st Century Wire says It s a familiar theme. ...,Middle-east,"January 16, 2016"
44895,Sunnistan: US and Allied ‘Safe Zone’ Plan to T...,Patrick Henningsen 21st Century WireRemember ...,Middle-east,"January 15, 2016"
44896,How to Blow $700 Million: Al Jazeera America F...,21st Century Wire says Al Jazeera America will...,Middle-east,"January 14, 2016"


In [40]:
messages.reset_index(inplace=True)

In [41]:
messages

Unnamed: 0,index,title,text,subject,date
0,0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"
...,...,...,...,...,...
44893,44893,McPain: John McCain Furious That Iran Treated ...,21st Century Wire says As 21WIRE reported earl...,Middle-east,"January 16, 2016"
44894,44894,JUSTICE? Yahoo Settles E-mail Privacy Class-ac...,21st Century Wire says It s a familiar theme. ...,Middle-east,"January 16, 2016"
44895,44895,Sunnistan: US and Allied ‘Safe Zone’ Plan to T...,Patrick Henningsen 21st Century WireRemember ...,Middle-east,"January 15, 2016"
44896,44896,How to Blow $700 Million: Al Jazeera America F...,21st Century Wire says Al Jazeera America will...,Middle-east,"January 14, 2016"


In [42]:
import nltk
import re
from nltk.corpus import stopwords
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [43]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [44]:
### Dataset Preprocessing
from nltk.stem import WordNetLemmatizer ##Lemmatization purpose
ps = WordNetLemmatizer()
corpus = []
for i in range(0, len(messages)):
    review = re.sub('[^a-zA-Z]', ' ', messages['title'][i])
    review = review.lower()
    review = review.split()

    review = [ps.lemmatize(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

In [45]:
corpus

['u budget fight loom republican flip fiscal script',
 'u military accept transgender recruit monday pentagon',
 'senior u republican senator let mr mueller job',
 'fbi russia probe helped australian diplomat tip nyt',
 'trump want postal service charge much amazon shipment',
 'white house congress prepare talk spending immigration',
 'trump say russia probe fair timeline unclear nyt',
 'factbox trump twitter dec approval rating amazon',
 'trump twitter dec global warming',
 'alabama official certify senator elect jones today despite challenge cnn',
 'jones certified u senate winner despite moore challenge',
 'new york governor question constitutionality federal tax overhaul',
 'factbox trump twitter dec vanity fair hillary clinton',
 'trump twitter dec trump iraq syria',
 'man say delivered manure mnuchin protest new u tax law',
 'virginia official postpone lottery drawing decide tied statehouse election',
 'u lawmaker question businessman trump tower meeting source',
 'trump twitter 

In [46]:
corpus[220]

'justice department reviewing option ruling transgender recruit white house'

In [47]:
onehot_repr=[one_hot(words,voc_size)for words in corpus]
onehot_repr

[[3661, 6935, 1919, 6678, 2246, 6756, 2969, 4049],
 [3661, 617, 5844, 6872, 2812, 1414, 3586],
 [2439, 3661, 2246, 3379, 3082, 5618, 5126, 6575],
 [6660, 1407, 4468, 4642, 2071, 2759, 1220, 3371],
 [4684, 3092, 2614, 6869, 4978, 2051, 2264, 3220],
 [3141, 3554, 5130, 2869, 5934, 207, 3477],
 [4684, 1809, 1407, 4468, 6919, 125, 3198, 3371],
 [205, 4684, 1003, 3888, 4933, 966, 2264],
 [4684, 1003, 3888, 6770, 2800],
 [6051, 921, 247, 3379, 1729, 3235, 783, 3052, 3509, 612],
 [3235, 46, 3661, 6562, 4248, 3052, 4386, 3509],
 [2401, 2511, 3077, 1009, 4405, 1966, 4622, 2378],
 [205, 4684, 1003, 3888, 6310, 6919, 4530, 4087],
 [4684, 1003, 3888, 4684, 5944, 6088],
 [6778, 1809, 691, 6192, 4876, 6412, 2401, 3661, 4622, 2525],
 [6110, 921, 1802, 4458, 5745, 1760, 4756, 3870, 3110],
 [3661, 4012, 1009, 2000, 4684, 1158, 1599, 5116],
 [4684, 1003, 3888, 4530, 4087, 4622, 4961, 6886],
 [3661, 2351, 1719, 4351, 3509, 4684, 3760, 5731, 3758],
 [6231, 5498, 4876, 672, 1592, 3486, 6182, 3918, 6192, 65

In [48]:
corpus[1]

'u military accept transgender recruit monday pentagon'

In [49]:
onehot_repr[1]

[3661, 617, 5844, 6872, 2812, 1414, 3586]

### Embedding Representation

In [50]:
sent_length=20
embedded_docs=pad_sequences(onehot_repr,padding='post',maxlen=sent_length)
print(embedded_docs)

[[3661 6935 1919 ...    0    0    0]
 [3661  617 5844 ...    0    0    0]
 [2439 3661 2246 ...    0    0    0]
 ...
 [4024 3661 5979 ...    0    0    0]
 [5197 2927 2766 ...    0    0    0]
 [3661  470  982 ...    0    0    0]]


In [51]:
embedded_docs[1]

array([3661,  617, 5844, 6872, 2812, 1414, 3586,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0], dtype=int32)

In [52]:
embedded_docs[0]

array([3661, 6935, 1919, 6678, 2246, 6756, 2969, 4049,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0], dtype=int32)

In [55]:
len(embedded_docs),y.shape

(44898, (44898,))

In [56]:
import numpy as np
X_final=np.array(embedded_docs)
y_final=np.array(y)

In [57]:
X_final.shape,y_final.shape

((44898, 20), (44898,))

In [58]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.33, random_state=42)

### Model Training

In [81]:
## Creating model
embedding_vector_features=40 ##features representation
model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(LSTM(100))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.build()
model.predict(X_test)
print(model.summary())



[1m464/464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


None


In [82]:
### Finally Training
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,batch_size=64)

Epoch 1/10
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.8537 - loss: 0.3211 - val_accuracy: 0.9315 - val_loss: 0.1645
Epoch 2/10
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9546 - loss: 0.1225 - val_accuracy: 0.9310 - val_loss: 0.1817
Epoch 3/10
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9614 - loss: 0.1020 - val_accuracy: 0.9345 - val_loss: 0.1659
Epoch 4/10
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9694 - loss: 0.0830 - val_accuracy: 0.9368 - val_loss: 0.1716
Epoch 5/10
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.9774 - loss: 0.0642 - val_accuracy: 0.9353 - val_loss: 0.1802
Epoch 6/10
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.9834 - loss: 0.0487 - val_accuracy: 0.9376 - val_loss: 0.1836
Epoch 7/10
[1m471/471[0m 

<keras.src.callbacks.history.History at 0x7b7cf0789280>

### Adding Dropout

In [85]:
from tensorflow.keras.layers import Dropout, Bidirectional

## Creating model
embedding_vector_features=40
model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(100)))
model.add(Dropout(0.3))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])



### Performance Metrics And Accuracy

In [86]:
y_pred=model.predict(X_test)

[1m464/464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step


In [87]:
y_pred=np.where(y_pred > 0.5, 1,0) ##AUC ROC Curve

In [88]:
from sklearn.metrics import confusion_matrix

In [89]:
confusion_matrix(y_test,y_pred)

array([[  46, 7671],
       [   0, 7100]])

In [90]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.4822838631301883

In [91]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      0.01      0.01      7717
           1       0.48      1.00      0.65      7100

    accuracy                           0.48     14817
   macro avg       0.74      0.50      0.33     14817
weighted avg       0.75      0.48      0.32     14817



In [92]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test,y_pred)
print(f"Accuracy Score: {accuracy}")

Accuracy Score: 0.4822838631301883


In [74]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.52      0.99      0.68      7717
           1       0.00      0.00      0.00      7100

    accuracy                           0.52     14817
   macro avg       0.26      0.49      0.34     14817
weighted avg       0.27      0.52      0.35     14817



In [98]:
### Finally Training
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,batch_size=64)

Epoch 1/15
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 13ms/step - accuracy: 0.8369 - loss: 0.3357 - val_accuracy: 0.9349 - val_loss: 0.1649
Epoch 2/15
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9ms/step - accuracy: 0.9560 - loss: 0.1203 - val_accuracy: 0.9404 - val_loss: 0.1471
Epoch 3/15
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.9689 - loss: 0.0865 - val_accuracy: 0.9405 - val_loss: 0.1515
Epoch 4/15
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.9757 - loss: 0.0673 - val_accuracy: 0.9426 - val_loss: 0.1533
Epoch 5/15
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.9794 - loss: 0.0596 - val_accuracy: 0.9355 - val_loss: 0.1995
Epoch 6/15
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.9842 - loss: 0.0461 - val_accuracy: 0.9363 - val_loss: 0.1816
Epoch 7/15
[1m471/471

<keras.src.callbacks.history.History at 0x7b7c881fcdd0>

In [99]:
y_pred=model.predict(X_test)
y_pred=np.where(y_pred > 0.5, 1,0)

[1m464/464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [100]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test,y_pred)
print(f"Accuracy Score: {accuracy}")

Accuracy Score: 0.9379766484443545


In [101]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.94      0.94      0.94      7717
           1       0.94      0.93      0.94      7100

    accuracy                           0.94     14817
   macro avg       0.94      0.94      0.94     14817
weighted avg       0.94      0.94      0.94     14817



In [102]:
y_pred=model.predict(X_test)
y_pred=np.where(y_pred > 0.5, 1,0)

[1m464/464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [103]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test,y_pred)
print(f"Accuracy Score: {accuracy}")

Accuracy Score: 0.9379766484443545


In [104]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.94      0.94      0.94      7717
           1       0.94      0.93      0.94      7100

    accuracy                           0.94     14817
   macro avg       0.94      0.94      0.94     14817
weighted avg       0.94      0.94      0.94     14817

