Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
import re
from wordcloud import WordCloud

In [2]:
from sklearn.utils.class_weight import compute_class_weight

In [3]:
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [5]:
from tensorflow.keras.callbacks import EarlyStopping

In [6]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Embedding,LSTM,MaxPool1D,Conv1D



Importing Datasets

In [7]:
fake_news = pd.read_csv('/content/drive/MyDrive/Colab files/Fake.csv')
true_news = pd.read_csv('/content/drive/MyDrive/Colab files/True.csv')

WordCloud


Dataset preprocessing

In [8]:
true_news['class'] = 1
fake_news['class'] = 0



data = pd.concat([fake_news, true_news], axis=0).sample(frac=1).reset_index(drop=True)



def clean_text(text):
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    text = text.lower().split()
    text = [word for word in text if word not in stop_words]
    return " ".join(text)

data['title'] = data['title'].fillna('').apply(clean_text)

Visualizing data distribution

In [10]:
df = pd.DataFrame(data)
X = df['title'].values
y = df['class'].values

Tokenization

In [11]:
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(X)
sequences = tokenizer.texts_to_sequences(X)
maxlen = 20
X_padded = pad_sequences(sequences, maxlen=maxlen, padding='post')

Using and importing LSTM model

In [17]:
from tensorflow.keras.layers import Bidirectional,Embedding
# Sequential Model
model = Sequential()
total_words = 5000
# embeddidng layer
model.add(Embedding(total_words, output_dim = 128))
# model.add(Embedding(total_words, output_dim = 240))


# Bi-Directional RNN and LSTM
model.add(Bidirectional(LSTM(128))) # no of neurons

# Dense layers
model.add(Dense(128, activation = 'relu'))
model.add(Dense(1,activation= 'sigmoid')) # reason: we do binary classification here
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.summary()
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.3, random_state=42)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)
class_weights = compute_class_weight(class_weight='balanced', classes=np.array([0, 1]), y=y_train)
model.fit(X_train, y_train, epochs=5, batch_size=128, validation_split=0.1, callbacks=[early_stop],class_weight={0: class_weights[0], 1: class_weights[1]})


Epoch 1/10
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.8521 - loss: 0.3141 - val_accuracy: 0.9313 - val_loss: 0.1785
Epoch 2/10
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.9616 - loss: 0.1066 - val_accuracy: 0.9561 - val_loss: 0.1195
Epoch 3/10
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.9802 - loss: 0.0553 - val_accuracy: 0.9504 - val_loss: 0.1235
Epoch 4/10
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9851 - loss: 0.0441 - val_accuracy: 0.9548 - val_loss: 0.1444


<keras.src.callbacks.history.History at 0x797fe012e310>

In [19]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy Score:", accuracy)

[1m421/421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Accuracy Score: 0.9557535263548627


Predicting

In [20]:
def predict_news(text):

    tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
    tokenizer.fit_on_texts(text)
    sequences = tokenizer.texts_to_sequences(text)
    maxlen = 20
    padded = pad_sequences(sequences, maxlen=maxlen, padding='post')
    pred = model.predict(padded)[0][0]



    label = "Real News" if pred > 0.5 else "Fake News"
    print(f"Prediction: {label} ({pred:.2f})")


In [21]:
predict_news("NASA Confirms Presence of Water on the Moon")
predict_news("World Health Organization Approves Malaria Vaccine")
predict_news("Apple Unveils Next-Generation iPhone with New AI Features")
predict_news("UN Climate Agreement Signed by 150 Nations")
predict_news("Pfizer Announces New Drug to Treat Lung Cancer")

predict_news("Bill Gates Installs Tracking Chips in COVID Vaccines")
predict_news("Aliens Found Working at Go cgjvhbnklhxdtybhonjbfvyuogle Headquarters")
predict_news("5G Towers Responsible for Birhvjhkjcfh kgjbhkjkld Deaths Across the Globe")
predict_news("The Earth is Flat and NASA Faked Allfgxitgxcyktv Space Missions")
predict_news("Drinking Bleach Can Cure Coronavirus, Experts Say")


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Prediction: Fake News (0.29)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Prediction: Fake News (0.38)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Prediction: Real News (0.93)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Prediction: Real News (0.52)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Prediction: Fake News (0.23)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Prediction: Fake News (0.25)
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Prediction: Real News (0.76)
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Prediction: Real News (0.51)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Prediction: Real News (0.76)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Prediction: Fake Ne