### Importing the Necessary Libraries

In [30]:
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
from keras.layers import Dense
from keras.models import Sequential
import matplotlib.pyplot as plt
%matplotlib inline

### Reading the Dataset

In [2]:
df_true = pd.read_csv("True.csv")
df_fake = pd.read_csv("Fake.csv")
df_true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


### Concatenating the true and fake datasets 

In [3]:
df_true['category'] = 1
df_fake['category'] = 0
df = pd.concat([df_true,df_fake])

### Importing Libraries for Deep Learning

In [4]:
from keras.models import Model
from keras.layers import Dense, Input, Dropout, LSTM, Activation
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.initializers import glorot_uniform
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import Callback

In [8]:
#Initializing the Hyperparameters
vocab_size = 100000
embedding_dim_title = 128
max_length_title = 40
embedding_dim_text = 500
max_length_text = 500
trunc_type = 'post'
padding_type = 'post'
test_ratio = .2
embedding_dim = 500

### Tokenizing the Words (Mapping Words to Vectors)

In [25]:
df['text'] = df['title'] + df['text'] + df['subject']
X_train,X_test,y_train,y_test = train_test_split(df.text,df.category, test_size = 0.20)

t = Tokenizer(num_words = vocab_size)
t.fit_on_texts(X_train)
train_sequences = t.texts_to_sequences(X_train)
train_padded = pad_sequences(train_sequences, maxlen=max_length_title,
                                padding=padding_type,
                                truncating=trunc_type)
t.fit_on_texts(X_test)
test_sequences = t.texts_to_sequences(X_test)
test_padded = pad_sequences(test_sequences, maxlen=max_length_title,
                                padding=padding_type,
                                truncating=trunc_type)

train_padded = np.array(train_padded)
y_train = np.array(y_train)

### Custom Callback Function for Early Stopping

In [26]:
class AccuracyHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.acc = []

    def on_epoch_end(self, batch, logs={}):
        if logs.get('acc') > 0.95:
                print(f'Accuracy reached {logs.get("acc")*100:0.2f}. Stopping the training')
                self.model.stop_training = True

history = AccuracyHistory()


### Creating the Model

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embedding_dim)))
model.add(tf.keras.layers.Dense(embedding_dim, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

### Training the Model

In [29]:
model.fit(train_padded , y_train, epochs=5,batch_size=32, callbacks=[history])

Train on 35918 samples
Epoch 1/5
Epoch 2/5


<tensorflow.python.keras.callbacks.History at 0x10cd8499ba8>

### Saving Model

In [31]:
model.save('modelLSTM09892.h5')