In [15]:
import tensorflow as tf
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
from sklearn.model_selection import train_test_split

from tensorflow.keras.optimizers import Adam

In [17]:
# Load dataset (Make sure 'text' column has news articles and 'label' column has 0 (real) or 1 (fake))
#Read the data
df = pd.read_csv('c:\\Users\\Adn\\Desktop\\news2.csv', usecols=['title', 'text', 'subject', 'date', 'labels'])
df = df.loc[:, ~df.columns.str.contains("^Unnamed")]
df["text"] = df["text"].astype(str).fillna("")
texts = df["text"].values
labels = df["labels"].values


In [28]:
tokenizer = Tokenizer(num_words=20000)  # Keep top 20,000 words
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=500)  # Standardizing article length
import pickle

# Save the tokenizer
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

print("Tokenizer saved successfully!")

Tokenizer saved successfully!


In [21]:
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)
print("NaN in X_train:", np.isnan(X_train).sum())
print("NaN in y_train:", np.isnan(y_train).sum())

NaN in X_train: 0
NaN in y_train: 0


In [22]:
# Build CNN + LSTM Model
# Modify the model (remove GlobalMaxPooling1D)
model = Sequential([
    Embedding(input_dim=20000, output_dim=300),  # Word embeddings
    Conv1D(filters=256, kernel_size=5, activation='relu'),  # CNN for feature extraction
    LSTM(128, return_sequences=True),  # LSTM after CNN
    Dropout(0.3),
    LSTM(64),  
    Dropout(0.3),
    Dense(32, activation="relu"),
    Dense(1, activation="sigmoid")  # Binary classification (fake/real)
])



optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)  # Clipping helps prevent instability
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [24]:
# Train Model
model.fit(X_train, y_train, epochs=2, batch_size=16, validation_data=(X_test, y_test))

Epoch 1/2
[1m2245/2245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2392s[0m 1s/step - accuracy: 0.9154 - loss: 0.2093 - val_accuracy: 0.9948 - val_loss: 0.0249
Epoch 2/2
[1m2245/2245[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2373s[0m 1s/step - accuracy: 0.9945 - loss: 0.0260 - val_accuracy: 0.9958 - val_loss: 0.0185


<keras.src.callbacks.history.History at 0x1b90e62b860>

In [25]:
# Evaluate
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 429ms/step - accuracy: 0.9951 - loss: 0.0217
Test Accuracy: 99.58%


In [27]:
model.save("fake_news_model.keras")
print("Model saved successfully!")


Model saved successfully!
