# Importing Libraries

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
import keras
import numpy as np
from keras.layers import Input, Lambda, Dense,Dropout,SpatialDropout1D,Conv1D,GlobalMaxPooling1D
from keras.models import Model
import keras.backend as K
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints
from keras.layers import *
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


# Loading data

In [2]:
dataset = pd.read_csv("fake_real_dataset.csv")

In [3]:
dataset.head()

Unnamed: 0.1,Unnamed: 0,news_url,title,label,merged
0,699,bustle.com,are rachel bryan still together in the bachelo...,1,bustle.com are rachel bryan still together in ...
1,15378,people.com,all about the crib kim kardashian west and bey...,1,people.com all about the crib kim kardashian w...
2,10359,bbc.co.uk,royal wedding meghan markle s givenchy dress i...,1,bbc.co.uk royal wedding meghan markle s givenc...
3,10551,people.com,anna wintour calls scarlett johansson wearing ...,1,people.com anna wintour calls scarlett johanss...
4,15248,usmagazine.com,stassi schroeder apologizes for ntroversial me...,1,usmagazine.com stassi schroeder apologizes for...


In [5]:
X = dataset["title"].values
Y = dataset["label"].values

In [18]:
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.2)

In [19]:
#new data
fake = pd.read_csv("gossipcop_fake.csv")
real = pd.read_csv("gossipcop_real.csv")
fake_t = fake.title
real_t = real.title


# Making Model

In [15]:
#url = "https://tfhub.dev/google/universal-sentence-encoder-large/3"
#embed = hub.text_embedding_column(key='sentence',module_spec=url,)
embedding_feature = hub.text_embedding_column(
    key='sentence', 
    module_spec="https://tfhub.dev/google/universal-sentence-encoder-large/3",
    trainable=False)

In [21]:
#creating custom layer
def UniversalEmbedding(x):
    return embedding_feature(tf.squeeze(tf.cast(x,tf.string)))
#creating a Attention layer
class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None
        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim
        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))
        if self.bias:
            eij += self.b
        eij = K.tanh(eij)
        a = K.exp(eij)
        if mask is not None:
            a *= K.cast(mask, K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0],  self.features_dim

In [None]:
#making the model using the functional keras api
input_text = Input(shape=(1,),dtype=tf.string)
embedding_feature = Lambda(UniversalEmbedding,output_shape=(512,))(input_text)
drop_1 = Dropout(0.2)(embedding)
#cnn_1 = Conv1D(256,3,padding='valid',activation='relu',strides=1)(drop_1)
#max_pool = GlobalMaxPooling1D()(cnn_1)
#att_1 = Attention()(max_pool)
dense = Dense(128, activation='relu')(drop_1)
drop_2 = Dropout(0.5)(dense)
pred = Dense(1, activation='softmax')(drop_2)
model = Model(inputs=[input_text], outputs=pred)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [17]:
with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())  
    session.run(tf.tables_initializer())
    history = model.fit(X, Y, epochs=1, batch_size=32,validation_split=0.2)
    model.save_weights('./model.h5')

Train on 17712 samples, validate on 4428 samples
Epoch 1/1


In [20]:
#using tf estimator
# Training input on the whole training set with no limit on training epochs.
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    {'sentence': X_train}, y_train, 
    batch_size=256, num_epochs=None, shuffle=True)
    
# Prediction on the whole training set.
predict_train_input_fn = tf.estimator.inputs.numpy_input_fn(
    {'sentence': X_train}, y_train, shuffle=False)
    
# Prediction on the whole validation set.
predict_val_input_fn = tf.estimator.inputs.numpy_input_fn(
    {'sentence': X_test}, y_test, shuffle=False)
    
# Prediction on the test set.
predict_test_input_fn = tf.estimator.inputs.numpy_input_fn(
    {'sentence': X_test}, y_test, shuffle=False)

In [23]:
dnn = tf.estimator.DNNClassifier(
          hidden_units=[512, 128],
          feature_columns=[embedding_feature],
          n_classes=2,
          activation_fn=tf.nn.relu,
          dropout=0.1,
          optimizer=tf.train.AdagradOptimizer(learning_rate=0.005))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\anubh\\AppData\\Local\\Temp\\tmpv7o4_dcu', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002CA4E219668>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [None]:

tf.logging.set_verbosity(tf.logging.ERROR)
import time

TOTAL_STEPS = 1500
STEP_SIZE = 100
for step in range(0, TOTAL_STEPS+1, STEP_SIZE):
    print()
    print('-'*100)
    print('Training for step =', step)
    start_time = time.time()
    dnn.train(input_fn=train_input_fn, steps=STEP_SIZE)
    elapsed_time = time.time() - start_time
    print('Train Time (s):', elapsed_time)
    print('Eval Metrics (Train):', dnn.evaluate(input_fn=predict_train_input_fn))
    print('Eval Metrics (Validation):', dnn.evaluate(input_fn=predict_val_input_fn))


----------------------------------------------------------------------------------------------------
Training for step = 0
