In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping

VOCAB_SIZE = 10000
MAX_LENGTH = 128
EMBEDDING_DIM = 64
BUFFER_SIZE = 10000
BATCH_SIZE = 32
EPOCHS = 10

try:
    df = pd.read_csv('fake_news_dataset(in).csv')
except FileNotFoundError:
    print("Error: 'fake_news_dataset(in).csv' not found. Please make sure the CSV file is in the correct directory and you have provided the correct path.")
    exit()

if 'text' not in df.columns or 'label' not in df.columns:
    print("Error: The CSV file must contain 'text' and 'label' columns.")
    exit()

X = df['text']
df['label'] = df['label'].apply(lambda x: 1 if x == 'real' else 0)
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data loaded and split successfully.")
print(f"Training set size: {len(X_train)}")
print(f"Testing set size: {len(X_test)}")

tokenizer = Tokenizer(num_words=VOCAB_SIZE, oov_token="<OOV>")

tokenizer.fit_on_texts(X_train)

X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_sequences, maxlen=MAX_LENGTH, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_sequences, maxlen=MAX_LENGTH, padding='post', truncating='post')

print("\nText preprocessing complete.")
print(f"Shape of training data after padding: {X_train_padded.shape}")
print(f"Shape of testing data after padding: {X_test_padded.shape}")

model = Sequential([
    Embedding(VOCAB_SIZE, EMBEDDING_DIM, input_length=MAX_LENGTH),
    SpatialDropout1D(0.2),
    GRU(64, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

print("\nStarting model training...")

history = model.fit(X_train_padded, y_train,
                    epochs=EPOCHS,
                    batch_size=BATCH_SIZE,
                    validation_data=(X_test_padded, y_test),
                    callbacks=[early_stopping],
                    verbose=1)

print("\nModel training finished.")

print("\nEvaluating the model...")
loss, accuracy = model.evaluate(X_test_padded, y_test, verbose=0)

print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy*100:.2f}%')

Data loaded and split successfully.
Training set size: 16000
Testing set size: 4000

Text preprocessing complete.
Shape of training data after padding: (16000, 128)
Shape of testing data after padding: (4000, 128)





Starting model training...
Epoch 1/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 142ms/step - accuracy: 0.4973 - loss: 0.6938 - val_accuracy: 0.5095 - val_loss: 0.6926
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 140ms/step - accuracy: 0.5696 - loss: 0.6837 - val_accuracy: 0.5045 - val_loss: 0.6969
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 135ms/step - accuracy: 0.6129 - loss: 0.6608 - val_accuracy: 0.5063 - val_loss: 0.7089
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 137ms/step - accuracy: 0.6302 - loss: 0.6427 - val_accuracy: 0.5077 - val_loss: 0.7161

Model training finished.

Evaluating the model...
Test Loss: 0.6926
Test Accuracy: 50.95%
