In [64]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Input
from sklearn.model_selection import train_test_split

In [None]:
#read the dataset
df = pd.read_csv('fake_news_dataset.csv')

In [50]:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

In [52]:
df.head()

Unnamed: 0,text,label
0,WASHINGTON (Reuters) - Two prominent Republica...,1
1,Michigan born and raised rock phenom Kid Rock ...,0
2,VIENNA (Reuters) - Austria s shift to the righ...,1
3,Dan Rather just released a Facebook post that ...,0
4,WASHINGTON (Reuters) - Republican Senate Major...,1


In [59]:
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

vocab_size = 10000
max_length = 200
embedding_dim = 100

tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_length, padding="post", truncating="post")
X_test_pad = pad_sequences(X_test_seq, maxlen=max_length, padding="post", truncating="post")

In [None]:
#define the model 
model = Sequential([
    Input(shape=(max_length,)),
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    LSTM(128, return_sequences=True),
    Dropout(0.3),
    LSTM(64),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)
#train the model
history = model.fit(
    X_train_pad, y_train,
    epochs=10,
    batch_size=32,
    validation_data=(X_test_pad, y_test),
    callbacks=[early_stopping]
)

Epoch 1/10
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m493s[0m 544ms/step - accuracy: 0.6087 - loss: 0.6262 - val_accuracy: 0.8386 - val_loss: 0.4060
Epoch 2/10
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m488s[0m 545ms/step - accuracy: 0.8784 - loss: 0.2989 - val_accuracy: 0.9731 - val_loss: 0.1065
Epoch 3/10
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m488s[0m 530ms/step - accuracy: 0.9807 - loss: 0.0901 - val_accuracy: 0.9908 - val_loss: 0.0490
Epoch 4/10
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m516s[0m 545ms/step - accuracy: 0.9862 - loss: 0.0673 - val_accuracy: 0.9860 - val_loss: 0.0627
Epoch 5/10
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m508s[0m 552ms/step - accuracy: 0.9946 - loss: 0.0254 - val_accuracy: 0.9961 - val_loss: 0.0222
Epoch 6/10
[1m896/896[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m491s[0m 548ms/step - accuracy: 0.9968 - loss: 0.0193 - val_accuracy: 0.9955 - val_loss: 0.0242
Epoc

In [None]:
#save the model
model.save("fake_news_lstm.h5")
import pickle

with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)



In [68]:
model.save("fake_news_lstm.keras")

In [None]:
from tensorflow.keras.models import load_model

#load the model for testing 
loaded_model = load_model("fake_news_lstm.h5")

with open("tokenizer.pkl", "rb") as f:
    loaded_tokenizer = pickle.load(f)

new_text = "Coca-Cola Cures Cancer, Scientists Confirm Text: Researchers at an undisclosed laboratory claim that drinking Coca-Cola daily can eliminate cancer cells. Pharmaceutical companies are allegedly trying to suppress this breakthrough."
new_seq = loaded_tokenizer.texts_to_sequences(new_text)
new_pad = pad_sequences(new_seq, maxlen=max_length, padding="post", truncating="post")

prediction = loaded_model.predict(new_pad)[0][0]
print("Prediction:", "Fake" if prediction > 0.5 else "Real")



[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 167ms/step
Prediction: Real
