In [1]:
# Import required libraries
import sys
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout # type: ignore
import tensorflow as tf
from tensorflow.keras.models import load_model # type: ignore
from sklearn.metrics import accuracy_score,confusion_matrix

In [2]:
# Add the project root to sys.path
sys.path.insert(0,os.path.abspath('..'))

In [3]:
# Import custom modules
from preprocessing import preprocess_text, text_embedding # type: ignore

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Ciya\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Ciya\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
# Load dataset
data = pd.read_csv('../data/FakeNewsData.csv')

data.head()

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1


In [5]:
# Drop NaN values
data = data.dropna().reset_index()

In [6]:
# Get shape of the dataset
data.shape

(18285, 6)

In [7]:
# Get independent features
X = data.drop('label',axis = 1)

In [8]:
# Get dependent feature
y = data['label']

In [9]:
# Preprocess the train data
preprocessed_data = text_embedding(X['title'])

preprocessed_data

array([[   0,    0,    0, ..., 3477, 4506, 1184],
       [   0,    0,    0, ..., 4637,  274, 2688],
       [   0,    0,    0, ...,  433, 3884, 1455],
       ...,
       [   0,    0,    0, ...,  285,  711, 3898],
       [   0,    0,    0, ...,  861,  606, 2659],
       [   0,    0,    0, ..., 1303, 1526, 3790]], dtype=int32)

In [10]:
# Split data into train and test
X_train,X_test,y_train,y_test = train_test_split(np.array(preprocessed_data),np.array(y),test_size=0.33,random_state=42)

In [11]:
# Get shapes of the train and test data
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((12250, 47), (6035, 47), (12250,), (6035,))

In [12]:
# Create the model
model = Sequential()
model.add(Embedding(input_dim = 5000,
                    output_dim = 40,
                    input_length = 47))
model.add(Dropout(0.3))
model.add(LSTM(100))
model.add(Dropout(0.3))
model.add(Dense(1,activation='sigmoid'))



In [13]:
# Compile the model
model.compile(loss = 'binary_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy'])

In [14]:
# Train the model
with tf.device('/GPU:0'):  # Explicitly run on GPU
  model.fit(X_train,
            y_train,
            epochs = 10,
            batch_size = 64)

Epoch 1/10
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.7600 - loss: 0.4577
Epoch 2/10
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.9377 - loss: 0.1598
Epoch 3/10
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.9625 - loss: 0.1047
Epoch 4/10
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.9728 - loss: 0.0743
Epoch 5/10
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.9836 - loss: 0.0495
Epoch 6/10
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.9901 - loss: 0.0351
Epoch 7/10
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.9943 - loss: 0.0190
Epoch 8/10
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.9962 - loss: 0.0172
Epoch 9/10
[1m192/192[0m [32m

In [15]:
# Save the model
model.save('../model/fake_news_model.h5')



In [16]:
# Load the trained model
model = load_model('../model/fake_news_model.h5')



In [17]:
# Get model summary
model.summary()

In [18]:
# Predict the based on the data using saved model
y_pred = model.predict(X_test)
y_pred = np.array((y_pred > 0.5).astype('int')).flatten()

y_pred

[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


array([1, 0, 0, ..., 0, 1, 1])

In [19]:
# Get accuracy score
score = accuracy_score(y_test,y_pred)

score

0.9093620546810274

In [20]:
# Get confusion matrix
matrix = confusion_matrix(y_test,y_pred)

matrix

array([[3106,  313],
       [ 234, 2382]])