In [28]:
# Fake News Detection using RNN - Jupyter Notebook

# First, let's import the necessary libraries
import numpy as np
import pandas as pd
import nltk
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import os

In [29]:
# Load the data
# load csv files to dataframes
df_fake=pd.read_csv('../datasets/dataset1/archive/Fake.csv')
df_real=pd.read_csv('../datasets/dataset1/archive/True.csv')
df_fake.head()


Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [30]:
# choose the necessary columns 
df_fake=df_fake.iloc[:,0:2]
df_real=df_real.iloc[:,0:2]

In [31]:
# label the class values
df_fake['class']=0
df_real['class']=1

In [32]:
# concatenate 2 dataframes
df=pd.concat([df_fake, df_real], ignore_index=True, sort=False )

print(df.shape)
print(len(df))
#merge "title" and "text" values in same column
df.insert(0,column="title_text", value=df['text'] + " " + df['text']) 
#remove previous columns that are merged
df.drop (['text', 'text'], inplace=True, axis=1)

(44898, 3)
44898


In [33]:
df.head()

Unnamed: 0,title_text,title,class
0,Donald Trump just couldn t wish all Americans ...,Donald Trump Sends Out Embarrassing New Year’...,0
1,House Intelligence Committee Chairman Devin Nu...,Drunk Bragging Trump Staffer Started Russian ...,0
2,"On Friday, it was revealed that former Milwauk...",Sheriff David Clarke Becomes An Internet Joke...,0
3,"On Christmas day, Donald Trump announced that ...",Trump Is So Obsessed He Even Has Obama’s Name...,0
4,Pope Francis used his annual Christmas Day mes...,Pope Francis Just Called Out Donald Trump Dur...,0


In [34]:

# Preprocess the data
X = df.title_text.values
y = df['class'].values

In [35]:
# Encode labels
le = LabelEncoder()
y = le.fit_transform(y)

In [36]:
# Tokenize text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)

In [37]:
# Pad sequences
max_length = 100
X = pad_sequences(X, maxlen=max_length)

In [38]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [39]:
# Build the model
vocab_size = len(tokenizer.word_index) + 1

model = Sequential([
    Embedding(vocab_size, 100, input_length=max_length),
    LSTM(128),
    Dense(1, activation='sigmoid')
])
model.build(input_shape=(None, max_length))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])




In [40]:
# Display model summary
model.summary()

In [None]:
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=5, batch_size=64)


Epoch 1/5


In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Function to make predictions
def predict_fake_news(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_length)
    prediction = model.predict(padded)[0][0]
    return "Fake" if prediction > 0.5 else "Real"

In [None]:
# Test the model with some example texts
example_texts = [
    "Breaking: Scientists discover new planet capable of supporting life!",
    "Local community comes together to clean up neighborhood park",
    "Shocking: Celebrity secretly a robot, inside sources reveal",
    "New study shows benefits of regular exercise on mental health"
]

for text in example_texts:
    prediction = predict_fake_news(text)
    print(f"Text: {text}")
    print(f"Prediction: {prediction}\n")

In [None]:
# Interactive prediction
while True:
    user_input = input("Enter a news headline (or 'quit' to exit): ")
    if user_input.lower() == 'quit':
        break
    prediction = predict_fake_news(user_input)
    print(f"Prediction: {prediction}\n")

print("Thank you for using the Fake News Detector!")