# LSTM Model for Fake News Detection
Steps:
1. Load cleaned dataset
2. Preprocess text (Tokenizer + sequences + padding)
3. Split dataset into train, validation, and test
4. Build LSTM model
5. Train model with validation
6. Evaluate performance
7. Save model and tokenizer

Load Dataset

In [3]:
import pandas as pd 
data=pd.read_csv("Data/cleaned.csv")
X=data['content'].astype(str)
Y=data['lable']
print ('Data loaded successfully',data.shape)

Data loaded successfully (44898, 3)


              preprocess text  
Use Tokenizer :Convert text → sequences → pad to a longer 

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
Tokenizer=Tokenizer(num_words=20000)
Tokenizer.fit_on_texts(X)

with open('artifacts/tokenizer.pickle','wb') as f:
    pickle.dump(Tokenizer,f)
    
    X_seq=Tokenizer.texts_to_sequences(X)
    X_pad=pad_sequences(X_seq,maxlen=200)

train /test /validation spit 

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_temp, Y_train, Y_temp = train_test_split(
    X_pad, Y, test_size=0.2, random_state=42, stratify=Y
)
X_val, X_test, Y_val, Y_test = train_test_split(
    X_temp, Y_temp, test_size=0.5, random_state=42, stratify=Y_temp
)   

Build LSTM model

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

model = Sequential([
    Embedding(20000, 128, input_length=200),
    LSTM(128),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])



Manually build to see summary

In [11]:

model.build(input_shape=(None, 200))
model.summary()

Compile & train

In [12]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=5, validation_data=(X_val, Y_val))

Epoch 1/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m838s[0m 741ms/step - accuracy: 0.9556 - loss: 0.1259 - val_accuracy: 0.9777 - val_loss: 0.0612
Epoch 2/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m762s[0m 652ms/step - accuracy: 0.9866 - loss: 0.0411 - val_accuracy: 0.9833 - val_loss: 0.0551
Epoch 3/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m734s[0m 654ms/step - accuracy: 0.9872 - loss: 0.0386 - val_accuracy: 0.9802 - val_loss: 0.0677
Epoch 4/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m698s[0m 620ms/step - accuracy: 0.9935 - loss: 0.0198 - val_accuracy: 0.9808 - val_loss: 0.0720
Epoch 5/5
[1m1123/1123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m639s[0m 529ms/step - accuracy: 0.9963 - loss: 0.0118 - val_accuracy: 0.9878 - val_loss: 0.0420


<keras.src.callbacks.history.History at 0x23556154050>

In [13]:
from sklearn.metrics import classification_report, confusion_matrix

Y_pred = (model.predict(X_test) > 0.5).astype(int)
print(classification_report(Y_test, Y_pred))
print("Confusion Matrix:\n", confusion_matrix(Y_test, Y_pred))


[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 184ms/step
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      2348
           1       0.99      0.99      0.99      2142

    accuracy                           0.99      4490
   macro avg       0.99      0.99      0.99      4490
weighted avg       0.99      0.99      0.99      4490

Confusion Matrix:
 [[2331   17]
 [  15 2127]]


save model

In [15]:
model.save("../notebook/artifacts/lstm_model.keras")  # preferred Keras format
print("LSTM model saved")


LSTM model saved
