In [24]:
import pandas as pd
import numpy as np
import tensorflow as tf
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [25]:
df = pd.read_csv('C://Users//USER//Documents//fake_or_news.csv')

In [26]:
df.head()

Unnamed: 0,Text,label
0,Top Trump Surrogate BRUTALLY Stabs Him In The...,Fake
1,U.S. conservative leader optimistic of common ...,Real
2,"Trump proposes U.S. tax overhaul, stirs concer...",Real
3,Court Forces Ohio To Allow Millions Of Illega...,Fake
4,Democrats say Trump agrees to work on immigrat...,Real


In [27]:
# Function to clean text
def clean_text(Text):
    # Remove special characters, numbers, and punctuations
    Text = re.sub(r'[^a-zA-Z\s]', '', Text, re.I|re.A)
    Text = Text.lower()
    Text = Text.strip()
    return Text

In [28]:
# Clean the text data
df['Text'] = df['Text'].apply(clean_text)

In [29]:
# Preprocess the data
X = df['Text'].values
y = df['label'].values

In [30]:
# Convert labels to numerical values
label_mapping = {'Fake': 0, 'Real': 1}
y = np.array([label_mapping[label] for label in y])

In [31]:
# Convert labels to float32
y = y.astype('float32')

In [32]:
# Tokenize the text data
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(X)
sequences = tokenizer.texts_to_sequences(X)
padded_sequences = pad_sequences(sequences, maxlen=200, padding='post', truncating='post')

In [33]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, y, test_size=0.2, random_state=42)

In [34]:
# Build the model
model = Sequential([
    Embedding(input_dim=5000, output_dim=64, input_length=200),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.5),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])




In [35]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [36]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=64)


Epoch 1/10
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 468ms/step - accuracy: 0.8705 - loss: 0.3244 - val_accuracy: 0.9985 - val_loss: 0.0061
Epoch 2/10
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 471ms/step - accuracy: 0.9992 - loss: 0.0025 - val_accuracy: 0.9980 - val_loss: 0.0050
Epoch 3/10
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 493ms/step - accuracy: 1.0000 - loss: 3.4508e-04 - val_accuracy: 0.9995 - val_loss: 0.0019
Epoch 4/10
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 491ms/step - accuracy: 1.0000 - loss: 1.1675e-04 - val_accuracy: 0.9995 - val_loss: 0.0017
Epoch 5/10
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 482ms/step - accuracy: 1.0000 - loss: 6.7437e-05 - val_accuracy: 0.9995 - val_loss: 0.0018
Epoch 6/10
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 436ms/step - accuracy: 1.0000 - loss: 4.0750e-05 - val_accuracy: 0.9995 - val_loss: 0

In [37]:
# Evaluate the model
y_pred = (model.predict(X_test) > 0.5).astype('int32')
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred))


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 98ms/step
Accuracy: 0.9994949494949495
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       973
         1.0       1.00      1.00      1.00      1007

    accuracy                           1.00      1980
   macro avg       1.00      1.00      1.00      1980
weighted avg       1.00      1.00      1.00      1980

