In [1]:
import pandas as pd

    # Load the fake and real news datasets
df_fake = pd.read_csv("Fake.csv")  # Fake news
df_real = pd.read_csv("True.csv")  # Real news

    # Combine both datasets and add labels
df_fake["label"] = "fake"
df_real["label"] = "real"

    # Merge into a single DataFrame
df = pd.concat([df_fake, df_real], ignore_index=True)

    # Display dataset info
print(df.head())
print(df["label"].value_counts())  # Check label distribution

                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date label  
0  December 31, 2017  fake  
1  December 31, 2017  fake  
2  December 30, 2017  fake  
3  December 29, 2017  fake  
4  December 25, 2017  fake  
label
fake    23481
real    21417
Name: count, dtype: int64


In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

    # Data Preprocessing
X = df['text'].values  # News text
y = df['label'].values  # Labels (Fake/Real)

    # Encode Labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)  # Fake = 1, Real = 0

    # Tokenization
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)
X = pad_sequences(X, maxlen=300)  # Pad sequences to the same length

    # Split Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Build LSTM Model
model = Sequential([
    Embedding(input_dim=5000, output_dim=64),
    LSTM(64, return_sequences=True),
    LSTM(64),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary Classification (Fake/Real)
])

    # Compile Model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Train Model
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m334s[0m 286ms/step - accuracy: 0.9176 - loss: 0.1937 - val_accuracy: 0.8758 - val_loss: 0.2361
Epoch 2/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 277ms/step - accuracy: 0.9777 - loss: 0.0687 - val_accuracy: 0.9910 - val_loss: 0.0329
Epoch 3/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m326s[0m 290ms/step - accuracy: 0.9907 - loss: 0.0306 - val_accuracy: 0.9796 - val_loss: 0.0587
Epoch 4/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m313s[0m 279ms/step - accuracy: 0.9918 - loss: 0.0290 - val_accuracy: 0.9903 - val_loss: 0.0354
Epoch 5/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0m 288ms/step - accuracy: 0.9936 - loss: 0.0232 - val_accuracy: 0.9927 - val_loss: 0.0325


<keras.src.callbacks.history.History at 0x1cbdb84ea50>

In [6]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")
print(f"Test Loss: {loss * 10:.2f}%")

[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 95ms/step - accuracy: 0.9928 - loss: 0.0381
Test Accuracy: 99.27%
Test Loss: 0.33%
