In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# Load datasets
df_fake = pd.read_csv('Fake.csv')
df_true = pd.read_csv('True.csv')

In [3]:
# Add labels
df_fake['label'] = 1
df_true['label'] = 0

In [4]:
# Combine title + text
df_fake['content'] = df_fake['title'] + " " + df_fake['text']
df_true['content'] = df_true['title'] + " " + df_true['text']


In [5]:
# Combine datasets
df = pd.concat([df_fake, df_true], axis=0).dropna(subset=["content"]).reset_index(drop=True)

In [6]:
# Tokenize
vocab_size = 20000
max_length = 300
embedding_dim = 100
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(df['content'])
sequences = tokenizer.texts_to_sequences(df['content'])
padded = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')

# Prepare data
X = padded

In [7]:
# Build model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])



In [8]:
# Compile
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [12]:
def predict_fake_news(text, model, tokenizer, max_length=300):
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_length, padding='post', truncating='post')
    prediction = model.predict(padded)[0][0]
    return "Fake News 🟥" if prediction > 0.5 else "Real News ✅"

# Example usage
test_news = "Breaking: President signs new climate change bill"
print(predict_fake_news(test_news, model, tokenizer))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Real News ✅


In [13]:
text_news = 'Ahmedabad Air India Plane Crash Live: DNA samples of 119 Air India plane crash victims matched; 76 bodies handed over'
print(predict_fake_news(test_news, model, tokenizer))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
Real News ✅
