## Importing libraries

In [28]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
import re 
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import itertools
import os
import shutil
import torch
import random
import glob
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline

## Importing data

In [3]:
fake_df = pd.read_csv(r'C:\Users\RezaHardMan\Documents\Python projects\datasets\news detection\fake.csv')
real_df = pd.read_csv(r'C:\Users\RezaHardMan\Documents\Python projects\datasets\news detection\True.csv')

## Preparing data

In [4]:
fake_df.drop(['date', 'subject'], axis=1, inplace=True)
real_df.drop(['date', 'subject'], axis=1, inplace=True)

In [5]:
fake_df['class'] = 0 
real_df['class'] = 1

In [6]:
news_df = pd.concat([fake_df, real_df], ignore_index=True, sort=False)
news_df

Unnamed: 0,title,text,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,0
...,...,...,...
44893,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,1
44894,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",1
44895,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,1
44896,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,1


In [7]:
news_df['text'] = news_df['title'] + news_df['text']
news_df.drop('title', axis=1, inplace=True)

In [8]:
inputs = news_df['text']
targets = news_df['class']

X_train, X_test, y_train, y_test = train_test_split(inputs, targets, test_size=0.20, random_state=18)

In [9]:
# I don't code this part.
def normalize(data):
    normalized = []
    for i in data:
        i = i.lower()
        # get rid of urls
        i = re.sub('https?://\S+|www\.\S+', '', i)
        # get rid of non words and extra spaces
        i = re.sub('\\W', ' ', i)
        i = re.sub('\n', '', i)
        i = re.sub(' +', ' ', i)
        i = re.sub('^ ', '', i)
        i = re.sub(' $', '', i)
        normalized.append(i)
    return normalized

X_train = normalize(X_train)
X_test = normalize(X_test)

In [10]:
max_vocab = 10000
tokenizer = Tokenizer(num_words=max_vocab)
tokenizer.fit_on_texts(X_train)

In [11]:
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)

In [29]:
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, padding='post', maxlen=256)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, padding='post', maxlen=256)
# Convert labels to one hot encoding matrix
y_train = to_categorical(y_train, 2)
y_test = to_categorical(y_test, 2)

## Training data

In [30]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(max_vocab, 32),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=2, activation='softmax')
])


In [31]:
print(model.summary())

from keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau
filepath1="weights-improvement-{epoch:02d}-{val_accuracy:.2f}.hdf5"
filepath2 = "best_weights.hdf5"
checkpoint1 = ModelCheckpoint(filepath1, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
checkpoint2 = ModelCheckpoint(filepath2, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint1,checkpoint2]

model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, None, 32)          320000    
                                                                 
 bidirectional_6 (Bidirectio  (None, None, 128)        49664     
 nal)                                                            
                                                                 
 bidirectional_7 (Bidirectio  (None, 32)               18560     
 nal)                                                            
                                                                 
 dense_8 (Dense)             (None, 64)                2112      
                                                                 
 dropout_7 (Dropout)         (None, 64)                0         
                                                                 
 dense_9 (Dense)             (None, 2)                

In [32]:
history = model.fit(X_train, y_train, batch_size=16, validation_split=0.1, epochs = 10, verbose=1, shuffle=True, callbacks=callbacks_list)

Epoch 1/10
Epoch 00001: val_accuracy improved from -inf to 0.98357, saving model to weights-improvement-01-0.98.hdf5

Epoch 00001: val_accuracy improved from -inf to 0.98357, saving model to best_weights.hdf5
Epoch 2/10
Epoch 00002: val_accuracy improved from 0.98357 to 0.99053, saving model to weights-improvement-02-0.99.hdf5

Epoch 00002: val_accuracy improved from 0.98357 to 0.99053, saving model to best_weights.hdf5
Epoch 3/10
Epoch 00003: val_accuracy did not improve from 0.99053

Epoch 00003: val_accuracy did not improve from 0.99053
Epoch 4/10
Epoch 00004: val_accuracy improved from 0.99053 to 0.99193, saving model to weights-improvement-04-0.99.hdf5

Epoch 00004: val_accuracy improved from 0.99053 to 0.99193, saving model to best_weights.hdf5
Epoch 5/10
Epoch 00005: val_accuracy did not improve from 0.99193

Epoch 00005: val_accuracy did not improve from 0.99193
Epoch 6/10
Epoch 00006: val_accuracy did not improve from 0.99193

Epoch 00006: val_accuracy did not improve from 0.9

## Testing model on our test data

In [33]:
model.evaluate(X_test, y_test)



[0.04422454163432121, 0.9924275875091553]

In [None]:
#very well :)