In [1]:
pip install pandas numpy nltk tensorflow flask scikit-learn




In [2]:
import pandas as pd
import numpy as np
import nltk
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, SpatialDropout1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from flask import Flask, render_template, request

In [3]:
# Download NLTK stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [4]:
# Load the dataset
Fake_df = pd.read_csv('Fake.csv')
True_df = pd.read_csv('True.csv')  # Replace with your dataset path

In [5]:
# Preprocessing text data
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    text = text.lower()  # Convert text to lowercase
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

Fake_df['text'] = Fake_df['text'].apply(preprocess_text)
True_df['text'] = True_df['text'].apply(preprocess_text)



In [6]:
# Assuming `fake_df` is your fake news dataset and `real_df` is your real news dataset.

Fake_df['label'] = 0  # Label fake news as 0
True_df['label'] = 1  # Label real news as 1

# Combine the datasets
df = pd.concat([Fake_df, True_df], ignore_index=True)

In [7]:
# Splitting dataset into train and test
X = df['text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Tokenizing the text data
tokenizer = Tokenizer(num_words=5000, lower=True, split=' ')
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [9]:
# Padding sequences to make sure all texts are of the same length
X_train_pad = pad_sequences(X_train_seq, maxlen=300)
X_test_pad = pad_sequences(X_test_seq, maxlen=300)

In [10]:
# LSTM Model
model = Sequential()
model.add(Embedding(5000, 128, input_length=300))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



In [11]:
# Training the model
history = model.fit(X_train_pad, y_train, epochs=5, batch_size=64, validation_data=(X_test_pad, y_test), verbose=2)


Epoch 1/5
562/562 - 401s - 713ms/step - accuracy: 0.9741 - loss: 0.0828 - val_accuracy: 0.9796 - val_loss: 0.0712
Epoch 2/5
562/562 - 446s - 794ms/step - accuracy: 0.9855 - loss: 0.0408 - val_accuracy: 0.9919 - val_loss: 0.0283
Epoch 3/5
562/562 - 438s - 779ms/step - accuracy: 0.9936 - loss: 0.0220 - val_accuracy: 0.9955 - val_loss: 0.0216
Epoch 4/5
562/562 - 437s - 778ms/step - accuracy: 0.9941 - loss: 0.0195 - val_accuracy: 0.9959 - val_loss: 0.0159
Epoch 5/5
562/562 - 445s - 792ms/step - accuracy: 0.9964 - loss: 0.0121 - val_accuracy: 0.9948 - val_loss: 0.0194


In [12]:
# Save the model
model.save('my_model.keras')