In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split
# Simple dataset with fake and real news
data = {
    "text": [
        "Breaking news! COVID-19 vaccine turns people into zombies.",
        "NASA confirms life on Mars after recent rover discovery!",
        "Stock market crash predicted by top analysts, sell now!",
        "Scientists develop new AI to cure cancer completely.",
        "Government announces economic reforms to boost growth.",
        "Local hero saves cat stuck in a tree."
    ],
    "label": [1, 1, 1, 1, 0, 0]  # 1 = Fake, 0 = Real
}

df = pd.DataFrame(data)

# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.2, random_state=42)
tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Padding sequences to the same length
X_train_padded = pad_sequences(X_train_seq, maxlen=10, padding='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=10, padding='post')
model = Sequential([
    Embedding(input_dim=1000, output_dim=16, input_length=10),
    LSTM(8),  # Simple LSTM layer
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
model.fit(X_train_padded, np.array(y_train), epochs=5, batch_size=2)
sample_text = ["COVID-19 vaccine is causing mutations!", "New law passed to improve healthcare."]
sample_seq = tokenizer.texts_to_sequences(sample_text)
sample_padded = pad_sequences(sample_seq, maxlen=10, padding='post')

prediction = model.predict(sample_padded)
for i, text in enumerate(sample_text):
    print(f"News: {text} -> Prediction: {'Fake' if prediction[i][0] > 0.5 else 'Real'}")



Epoch 1/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 51ms/step - accuracy: 0.5000 - loss: 0.6945
Epoch 2/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.8333 - loss: 0.6916
Epoch 3/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.8333 - loss: 0.6891
Epoch 4/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 1.0000 - loss: 0.6863
Epoch 5/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 1.0000 - loss: 0.6849 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 316ms/step
News: COVID-19 vaccine is causing mutations! -> Prediction: Real
News: New law passed to improve healthcare. -> Prediction: Real


In [12]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from sklearn.model_selection import train_test_split

# Simple dataset with fake and real news
data = {
    "text": [
        "Breaking news! COVID-19 vaccine turns people into zombies.",
        "NASA confirms life on Mars after recent rover discovery!",
        "Stock market crash predicted by top analysts, sell now!",
        "Scientists develop new AI to cure cancer completely.",
        "Government announces economic reforms to boost growth.",
        "Local hero saves cat stuck in a tree."
    ],
    "label": [1, 1, 1, 1, 0, 0]  # 1 = Fake, 0 = Real
}

df = pd.DataFrame(data)

# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.2, random_state=42)

# Tokenization and sequence conversion
tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Padding sequences
X_train_padded = pad_sequences(X_train_seq, maxlen=10, padding='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=10, padding='post')

# Define Bidirectional LSTM model
model = Sequential([
    Embedding(input_dim=1000, output_dim=16, input_length=10),
    Bidirectional(LSTM(8)),  # Bidirectional LSTM
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Train model
model.fit(X_train_padded, np.array(y_train), epochs=5, batch_size=2)

# Test with new samples
sample_text = ["COVID-19 vaccine is causing mutations!", "New law passed to improve healthcare."]
sample_seq = tokenizer.texts_to_sequences(sample_text)
sample_padded = pad_sequences(sample_seq, maxlen=10, padding='post')

# Predictions
prediction = model.predict(sample_padded)
for i, text in enumerate(sample_text):
    print(f"News: {text} -> Prediction: {'Fake' if prediction[i][0] > 0.5 else 'Real'}")

Epoch 1/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 39ms/step - accuracy: 0.6667 - loss: 0.6893
Epoch 2/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.5000 - loss: 0.6897
Epoch 3/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.6667 - loss: 0.6867
Epoch 4/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.6667 - loss: 0.6845
Epoch 5/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.8333 - loss: 0.6810
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step
News: COVID-19 vaccine is causing mutations! -> Prediction: Real
News: New law passed to improve healthcare. -> Prediction: Real
