In [None]:
#import all libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv('/content/WELFake_Dataset.csv',engine='python',on_bad_lines='skip')

pandas uses the C engine (faster but less flexible).

The Python engine is slower but more tolerant, and it lets you handle bad lines more flexibly.

So:

C engine → fast, strict.

Python engine → slower, but can handle messy CSVs.

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...,1
1,1,,Did they post their votes for Hillary already?,1
2,2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,"Now, most of the demonstrators gathered last ...",1
3,3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...,0
4,4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will...",1


In [None]:
# check null value
df.isnull().sum()

Unnamed: 0,0
Unnamed: 0,0
title,565
text,57
label,20


In [None]:
df.shape

(72154, 4)

In [None]:
# as out data is big so we are dropping all the null values
df.dropna(inplace=True)

In [None]:
df.isnull().sum()

Unnamed: 0,0
Unnamed: 0,0
title,0
text,0
label,0


In [None]:
#get the independent and dependent feature
x=df.drop('label',axis=1)
y=df['label']


In [None]:
x.head()

Unnamed: 0.1,Unnamed: 0,title,text
0,0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...
2,2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,"Now, most of the demonstrators gathered last ..."
3,3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...
4,4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will..."
5,5,About Time! Christian Group Sues Amazon and SP...,All we can say on this one is it s about time ...


In [None]:
#drop the first column
x=x.drop('Unnamed: 0',axis=1)
x.head()

Unnamed: 0,title,text
0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...
2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,"Now, most of the demonstrators gathered last ..."
3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...
4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will..."
5,About Time! Christian Group Sues Amazon and SP...,All we can say on this one is it s about time ...


In [None]:
#distribution of target variable
y.value_counts()  #

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
1,36507
0,35028
is just unbelievably rich from North Sea oil.,1
за участие. Благодарю вас.,1


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Bidirectional

In [None]:
#vocabulary size
voc_size=5000

In [None]:
messages=x.copy()

In [None]:
messages

Unnamed: 0,title,text
0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...
2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,"Now, most of the demonstrators gathered last ..."
3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...
4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will..."
5,About Time! Christian Group Sues Amazon and SP...,All we can say on this one is it s about time ...
...,...,...
72149,Russians steal research on Trump in hack of U....,WASHINGTON (Reuters) - Hackers believed to be ...
72150,WATCH: Giuliani Demands That Democrats Apolog...,"You know, because in fantasyland Republicans n..."
72151,Migrants Refuse To Leave Train At Refugee Camp...,Migrants Refuse To Leave Train At Refugee Camp...
72152,Trump tussle gives unpopular Mexican leader mu...,MEXICO CITY (Reuters) - Donald Trump’s combati...


In [None]:
messages.reset_index(inplace=True)

In [None]:
import nltk
import re
from nltk.corpus import stopwords

In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
### Dataset Preprocessing
from nltk.stem.porter import PorterStemmer ##stemming purpose
ps = PorterStemmer()
corpus = []  ## list of all data
for i in range(0, len(messages)):
    review = re.sub('[^a-zA-Z]', ' ', messages['title'][i]) ## remove all the special characters from every review
    review = review.lower() ## lower casing
    review = review.split() ## spliting the data

    review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    corpus.append(review)

In [None]:
corpus

['law enforc high alert follow threat cop white blacklivesmatt fyf terrorist video',
 'unbeliev obama attorney gener say charlott rioter peac protest home state north carolina video',
 'bobbi jindal rais hindu use stori christian convers woo evangel potenti bid',
 'satan russia unv imag terrifi new supernuk western world take notic',
 'time christian group sue amazon splc design hate group',
 'dr ben carson target ir never audit spoke nation prayer breakfast',
 'hous intel chair trump russia fake stori evid anyth video',
 'sport bar owner ban nfl game show true american sport like speak rural america video',
 'latest pipelin leak underscor danger dakota access pipelin',
 'gop senat smack punchabl alt right nazi internet',
 'may brexit offer would hurt cost eu citizen eu parliament',
 'schumer call trump appoint offici overse puerto rico relief',
 'watch hilari ad call question health age clinton crime famili boss',
 'chang expect espn polit agenda despit huge subscrib declin breitbart'

#### One hot representation


In [None]:
onehot_repr=[one_hot(words,voc_size) for words in corpus]
onehot_repr

[[4211, 3869, 2474, 3527, 640, 4295, 1402, 720, 2292, 1622, 4258, 1365],
 [4763,
  3259,
  2149,
  3736,
  3454,
  688,
  1812,
  4076,
  448,
  616,
  2123,
  2,
  3643,
  1365],
 [2684, 3938, 4703, 4438, 3482, 1688, 2459, 917, 457, 3963, 3947, 4588],
 [937, 2040, 3445, 2403, 4980, 4508, 476, 1451, 859, 2956, 171],
 [2127, 2459, 2045, 4301, 3164, 352, 41, 3075, 2045],
 [2678, 2476, 4954, 2589, 1927, 3734, 2810, 2711, 4099, 594, 4446],
 [58, 195, 1611, 3444, 2040, 4142, 1688, 246, 914, 1365],
 [428,
  1316,
  1116,
  4795,
  2603,
  3937,
  11,
  3213,
  1919,
  428,
  291,
  250,
  1432,
  3029,
  1365],
 [1595, 4542, 3960, 4130, 3217, 1718, 3484, 4542],
 [78, 2152, 4445, 3700, 4002, 1759, 2260, 1701],
 [4972, 1506, 1001, 250, 742, 832, 3014, 1175, 3014, 1544],
 [4972, 4715, 3444, 938, 240, 4952, 3950, 2725, 1399],
 [1420, 3926, 1147, 4715, 1177, 4949, 211, 486, 3009, 2233, 1714],
 [630, 401, 470, 4881, 2351, 4874, 3610, 3048, 596, 1125],
 [2013, 986, 2607, 1027, 3193, 58, 4229],
 [35

In [None]:
# Embedding representation
sent_length=20
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)

embedded_docs

array([[   0,    0,    0, ..., 1622, 4258, 1365],
       [   0,    0,    0, ...,    2, 3643, 1365],
       [   0,    0,    0, ..., 3963, 3947, 4588],
       ...,
       [   0,    0,    0, ..., 2776, 2434,  676],
       [   0,    0,    0, ..., 1340, 3958,  944],
       [   0,    0,    0, ..., 1308,  486,  361]], dtype=int32)

In [None]:
#creating model
embedded_vector_size=30 # number of feature
model=Sequential()
model.add(Embedding(voc_size,embedded_vector_size,input_shape=(sent_length,)))
model.add(Bidirectional(LSTM(100)))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())

None


In [None]:

X_final=np.array(embedded_docs)
y_final=np.array(y)

In [None]:
#train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.33, random_state=42)
#

In [None]:
## Model Training
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,batch_size=32)

Epoch 1/10


UnimplementedError: Graph execution error:

Detected at node compile_loss/binary_crossentropy/Cast defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.12/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/kernelapp.py", line 712, in start

  File "/usr/local/lib/python3.12/dist-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.12/asyncio/base_events.py", line 645, in run_forever

  File "/usr/lib/python3.12/asyncio/base_events.py", line 1999, in _run_once

  File "/usr/lib/python3.12/asyncio/events.py", line 88, in _run

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/kernelbase.py", line 499, in process_one

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/kernelbase.py", line 730, in execute_request

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/ipkernel.py", line 383, in do_execute

  File "/usr/local/lib/python3.12/dist-packages/ipykernel/zmqshell.py", line 528, in run_cell

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "/tmp/ipython-input-3933341715.py", line 2, in <cell line: 0>

  File "/usr/local/lib/python3.12/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/trainer.py", line 377, in fit

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/trainer.py", line 220, in function

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/trainer.py", line 133, in multi_step_on_iterator

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/trainer.py", line 114, in one_step_on_data

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/trainer.py", line 61, in train_step

  File "/usr/local/lib/python3.12/dist-packages/keras/src/trainers/trainer.py", line 383, in _compute_loss

  File "/usr/local/lib/python3.12/dist-packages/keras/src/trainers/trainer.py", line 351, in compute_loss

  File "/usr/local/lib/python3.12/dist-packages/keras/src/trainers/compile_utils.py", line 690, in __call__

  File "/usr/local/lib/python3.12/dist-packages/keras/src/trainers/compile_utils.py", line 699, in call

  File "/usr/local/lib/python3.12/dist-packages/keras/src/losses/loss.py", line 63, in __call__

  File "/usr/local/lib/python3.12/dist-packages/keras/src/tree/tree_api.py", line 192, in map_structure

  File "/usr/local/lib/python3.12/dist-packages/keras/src/tree/optree_impl.py", line 111, in map_structure

  File "/usr/local/lib/python3.12/dist-packages/optree/ops.py", line 766, in tree_map

  File "/usr/local/lib/python3.12/dist-packages/keras/src/losses/loss.py", line 64, in <lambda>

  File "/usr/local/lib/python3.12/dist-packages/keras/src/ops/core.py", line 958, in convert_to_tensor

  File "/usr/local/lib/python3.12/dist-packages/keras/src/backend/tensorflow/core.py", line 160, in convert_to_tensor

Cast string to float is not supported
	 [[{{node compile_loss/binary_crossentropy/Cast}}]] [Op:__inference_multi_step_on_iterator_4132]