In [1]:
#!pip install numpy==1.22.0

In [2]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/fake-and-real-news-dataset/True.csv
/kaggle/input/fake-and-real-news-dataset/Fake.csv


In [3]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm



In [4]:
from transformers import AutoTokenizer,TFAutoModelForSequenceClassification
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM,Bidirectional,\
                                    SimpleRNN,Conv1D,Embedding, \
                                    Dense,Dropout,GlobalAveragePooling1D
from tensorflow.keras.utils import pad_sequences
from nltk.corpus import stopwords
stopwords=stopwords.words("english")

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [5]:
path="../input/fake-and-real-news-dataset"

In [6]:
def reading_data(data="data"):
    """ 
    __Description__: Function to read data from the path
    _args_: 
        Data : The path to the csv files
    """
    
    true=pd.read_csv(f"{data}/True.csv")
    fake=pd.read_csv(f"{data}/Fake.csv")
    return true,fake

true_data,fake_data=reading_data(path)

In [7]:
true_data.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [8]:
fake_data.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [9]:
true_data.info(),\
print(""),\
fake_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21417 entries, 0 to 21416
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    21417 non-null  object
 1   text     21417 non-null  object
 2   subject  21417 non-null  object
 3   date     21417 non-null  object
dtypes: object(4)
memory usage: 669.4+ KB

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23481 entries, 0 to 23480
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   title    23481 non-null  object
 1   text     23481 non-null  object
 2   subject  23481 non-null  object
 3   date     23481 non-null  object
dtypes: object(4)
memory usage: 733.9+ KB


(None, None, None)

In [10]:
tdata_subjects=true_data["subject"].unique()
fdata_subjects=fake_data["subject"].unique()
print(tdata_subjects,fdata_subjects)

['politicsNews' 'worldnews'] ['News' 'politics' 'Government News' 'left-news' 'US_News' 'Middle-east']


In [11]:
def true_subjects_count(news_type,col="subject"):
    news_dict={}
    for news in news_type:
        news_dict[news]=len(true_data[true_data[col]==news])
    return news_dict
def fake_subjects_count(news_type,col="subject"):
    news_dict={}
    for news in news_type:
        news_dict[news]=len(fake_data[fake_data[col]==news])
    return news_dict

true_news_count=true_subjects_count(tdata_subjects)
fake_news_types=fake_subjects_count(fdata_subjects)

In [12]:
{print(f"There is a total in news type of {v} {k} which are true") for k,v in true_news_count.items()}
print()
{print(f"There is a total of news type {v} {k} which are fake") for k,v in fake_news_types.items()}

There is a total in news type of 11272 politicsNews which are true
There is a total in news type of 10145 worldnews which are true

There is a total of news type 9050 News which are fake
There is a total of news type 6841 politics which are fake
There is a total of news type 1570 Government News which are fake
There is a total of news type 4459 left-news which are fake
There is a total of news type 783 US_News which are fake
There is a total of news type 778 Middle-east which are fake


{None}

In [13]:
true_data["News Type"]="Real"
fake_data["News Type"]="Fake"
print(true_data.shape,fake_data.shape)

(21417, 5) (23481, 5)


In [14]:
true_data.columns

Index(['title', 'text', 'subject', 'date', 'News Type'], dtype='object')

In [15]:
merged_data=pd.merge(true_data,fake_data,on=list(true_data.columns),how='outer')
merged_data=merged_data.sample(frac=1).reset_index(drop=True)
merged_data

Unnamed: 0,title,text,subject,date,News Type
0,Obama Oversees Record Stock Market Results De...,For more than seven years now the right-wing l...,News,"April 20, 2016",Fake
1,NBA CRYBABY COACH Worries About President-Elec...,"It turns out the hypocrite, liberal, Golden S...",politics,"Nov 20, 2016",Fake
2,"In Trump/Clinton face-off on Monday, winning o...",WASHINGTON (Reuters) - When Donald Trump and H...,politicsNews,"September 23, 2016",Real
3,KEITH SCOTT’S BROTHER Tells Charlotte Reporter...,The first Black (and half White) President has...,politics,"Sep 23, 2016",Fake
4,"As Trump meets biotech CEOs, farm advisers fre...",CHICAGO (Reuters) - U.S. President-elect Donal...,politicsNews,"January 12, 2017",Real
...,...,...,...,...,...
44893,U.S. tax reform poses more risks for state and...,CHICAGO/NEW YORK (Reuters) - State and local g...,politicsNews,"November 15, 2017",Real
44894,Man with Palestinian flag smashes Jewish resta...,AMSTERDAM (Reuters) - A man with a Palestinian...,worldnews,"December 7, 2017",Real
44895,Trump Went ‘BALLISTIC’ When Sessions Recused ...,Donald Trump reportedly flew into a fit of rag...,News,"March 4, 2017",Fake
44896,John McCain’s Opponent Sees Donations Pour In...,This is why John McCain really regretted blami...,News,"June 18, 2016",Fake


<h2>Modeling</h2>